Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 30 Jun 2011 14:10:49 +0000 (UTC)
From:      Gabor Kovesdan <gabor@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r223693 - in user/gabor/tre-integration: contrib/tre/lib include
Message-ID:  <201106301410.p5UEAnPJ046950@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gabor
Date: Thu Jun 30 14:10:49 2011
New Revision: 223693
URL: http://svn.freebsd.org/changeset/base/223693

Log:
  - Plug in the fixed string matching code

Modified:
  user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
  user/gabor/tre-integration/contrib/tre/lib/fastmatch.h
  user/gabor/tre-integration/contrib/tre/lib/regexec.c
  user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
  user/gabor/tre-integration/contrib/tre/lib/tre.h
  user/gabor/tre-integration/include/regex.h

Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.c	Thu Jun 30 10:56:02 2011	(r223692)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c	Thu Jun 30 14:10:49 2011	(r223693)
@@ -235,7 +235,7 @@ tre_fastcomp(fastmatch_t *fg, const tre_
 
 int
 tre_fastexec(const fastmatch_t *fg, const tre_char_t *data, size_t len,
-    int nmatch, regmatch_t *pmatch)
+    int nmatch, regmatch_t pmatch[])
 {
   unsigned int j;
   int cnt = 0;
@@ -253,8 +253,10 @@ tre_fastexec(const fastmatch_t *fg, cons
       j = fg->eol ? len - fg->len : 0;
       if (fastcmp(fg->pattern, data + j,
 	  fg->len) == -1) {
-	pmatch->rm_so = j;
-	pmatch->rm_eo = j + fg->len;
+	if (!(fg->cflags & REG_NOSUB) || (nmatch < 1))
+	  return 0;
+	pmatch[cnt].rm_so = j;
+	pmatch[cnt].rm_eo = j + fg->len;
 	ret = 0;
       }
     }
@@ -264,7 +266,7 @@ tre_fastexec(const fastmatch_t *fg, cons
     do {
       if (fastcmp(fg->pattern, data + j - fg->len,
 	  fg->len) == -1) {
-	if (!(fg->cflags & REG_NOSUB))
+	if (!(fg->cflags & REG_NOSUB) || (nmatch < 1))
 	  return (0);
 	pmatch[cnt++].rm_so = j - fg->len;
 	pmatch[cnt++].rm_eo = j;
@@ -296,7 +298,7 @@ tre_fastexec(const fastmatch_t *fg, cons
     j = 0;
     do {
       if (fastcmp(fg->pattern, data + j, fg->len) == -1) {
-	if (!(fg->cflags & REG_NOSUB))
+	if (!(fg->cflags & REG_NOSUB) || (nmatch < 1))
 	  return (0);
 	pmatch[cnt++].rm_so = j;
 	pmatch[cnt++].rm_eo = j + fg->len;

Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.h	Thu Jun 30 10:56:02 2011	(r223692)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.h	Thu Jun 30 14:10:49 2011	(r223693)
@@ -28,6 +28,8 @@
 #ifndef FASTMATCH_H
 #define FASTMATCH_H 1
 
+#include <stdbool.h>
+
 #include "hashtable.h"
 #include "tre.h"
 
@@ -53,7 +55,7 @@ int	tre_fastcomp_literal(fastmatch_t *pr
 int	tre_fastcomp(fastmatch_t *preg, const tre_char_t *regex, size_t,
 	    int cflags);
 int	tre_fastexec(const fastmatch_t *fg, const tre_char_t *data,
-	    size_t len, int nmatch, regmatch_t *pmatch);
+	    size_t len, int nmatch, regmatch_t pmatch[]);
 void	tre_fastfree(fastmatch_t *preg);
 
 #endif		/* FASTMATCH_H */

Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/regexec.c	Thu Jun 30 10:56:02 2011	(r223692)
+++ user/gabor/tre-integration/contrib/tre/lib/regexec.c	Thu Jun 30 14:10:49 2011	(r223693)
@@ -44,6 +44,7 @@ char *alloca ();
 #endif /* HAVE_MALLOC_H */
 #include <limits.h>
 
+#include "fastmatch.h"
 #include "tre-internal.h"
 #include "tre.h"
 #include "xmalloc.h"
@@ -150,10 +151,16 @@ tre_have_approx(const regex_t *preg)
 static int
 tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len,
 	  tre_str_type_t type, size_t nmatch, regmatch_t pmatch[],
-	  int eflags)
+	  int eflags, void *shortcut)
 {
   reg_errcode_t status;
   int *tags = NULL, eo;
+
+  /* Check if we can cheat with a fixed string */
+  if (shortcut != NULL)
+    return tre_fastexec((fastmatch_t *)shortcut, (const tre_char_t *)string,
+			len, nmatch, pmatch);
+
   if (tnfa->num_tags > 0 && nmatch > 0)
     {
 #ifdef TRE_USE_ALLOCA
@@ -222,7 +229,8 @@ tre_regnexec(const regex_t *preg, const 
     size_t slen = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);
     size_t offset = pmatch[0].rm_so;
     str = &str[offset];
-    int ret = tre_match(tnfa, str, slen, type, nmatch, pmatch, eflags);
+    int ret = tre_match(tnfa, str, slen, type, nmatch, pmatch, eflags,
+			preg->shortcut);
     if (!(eflags & REG_NOSUB))
     {
       for (unsigned i = 0; i < nmatch; i++)
@@ -235,7 +243,8 @@ tre_regnexec(const regex_t *preg, const 
   }
   else
   {
-    return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags);
+    return tre_match(tnfa, str, len, type, nmatch, pmatch, eflags,
+		     preg->shortcut);
   }
 }
 
@@ -260,7 +269,8 @@ tre_regwnexec(const regex_t *preg, const
     size_t slen = (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so);
     size_t offset = pmatch[0].rm_so;
     str = &str[offset];
-    int ret = tre_match(tnfa, str, slen, STR_WIDE, nmatch, pmatch, eflags);
+    int ret = tre_match(tnfa, str, slen, STR_WIDE, nmatch, pmatch, eflags,
+			preg->shortcut);
     if (!(eflags & REG_NOSUB))
     {
       for (unsigned i = 0; i < nmatch; i++)
@@ -273,7 +283,8 @@ tre_regwnexec(const regex_t *preg, const
   }
   else
   {
-    return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags);
+    return tre_match(tnfa, str, len, STR_WIDE, nmatch, pmatch, eflags,
+		     preg->shortcut);
   }
 }
 
@@ -291,7 +302,8 @@ tre_reguexec(const regex_t *preg, const 
 	 size_t nmatch, regmatch_t pmatch[], int eflags)
 {
   tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
-  return tre_match(tnfa, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags);
+  return tre_match(tnfa, str, (unsigned)-1, STR_USER, nmatch, pmatch, eflags,
+		   preg->shortcut);
 }
 
 
@@ -315,7 +327,7 @@ tre_match_approx(const tre_tnfa_t *tnfa,
   if (params.max_cost == 0 && !tnfa->have_approx
       && !(eflags & REG_APPROX_MATCHER))
     return tre_match(tnfa, string, len, type, match->nmatch, match->pmatch,
-		     eflags);
+		     eflags, NULL);
 
   /* Back references are not supported by the approximate matcher. */
   if (tnfa->have_backrefs)

Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre-compile.c	Thu Jun 30 10:56:02 2011	(r223692)
+++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.c	Thu Jun 30 14:10:49 2011	(r223693)
@@ -20,6 +20,7 @@
 #include <assert.h>
 #include <string.h>
 
+#include "fastmatch.h"
 #include "tre-internal.h"
 #include "tre-mem.h"
 #include "tre-stack.h"
@@ -1858,17 +1859,30 @@ tre_compile(regex_t *preg, const tre_cha
   tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
   tre_pos_and_tags_t *p;
   int *counts = NULL, *offs = NULL;
-  int i, add = 0;
+  int i, add = 0, ret;
   tre_tnfa_transition_t *transitions, *initial;
   tre_tnfa_t *tnfa = NULL;
   tre_submatch_data_t *submatch_data;
   tre_tag_direction_t *tag_directions = NULL;
   reg_errcode_t errcode;
   tre_mem_t mem;
+  fastmatch_t shortcut;
 
   /* Parse context. */
   tre_parse_ctx_t parse_ctx;
 
+  /* Check if we can cheat with a fixed string algorithm. */
+  ret = (cflags & REG_LITERAL)
+    ? tre_fastcomp_literal(&shortcut, regex, n, cflags)
+    : tre_fastcomp(&shortcut, regex, n, cflags);
+  if (!ret)
+    {
+      preg->shortcut = &shortcut;
+      return REG_OK;
+    }
+  else
+    preg->shortcut = NULL;
+
   /* Allocate a stack used throughout the compilation process for various
      purposes. */
   stack = tre_stack_new(512, 10240, 128);

Modified: user/gabor/tre-integration/contrib/tre/lib/tre.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/tre.h	Thu Jun 30 10:56:02 2011	(r223692)
+++ user/gabor/tre-integration/contrib/tre/lib/tre.h	Thu Jun 30 14:10:49 2011	(r223693)
@@ -48,6 +48,7 @@ typedef int regoff_t;
 typedef struct {
   size_t re_nsub;  /* Number of parenthesized subexpressions. */
   void *value;     /* For internal use only. */
+  void *shortcut;  /* For internal use only. */
   const char *re_endp;
 } regex_t;
 

Modified: user/gabor/tre-integration/include/regex.h
==============================================================================
--- user/gabor/tre-integration/include/regex.h	Thu Jun 30 10:56:02 2011	(r223692)
+++ user/gabor/tre-integration/include/regex.h	Thu Jun 30 14:10:49 2011	(r223693)
@@ -40,6 +40,7 @@ typedef int regoff_t;
 typedef struct {
   size_t re_nsub;  /* Number of parenthesized subexpressions. */
   void *value;	   /* For internal use only. */
+  void *shortcut;  /* For internal use only. */
   const char *re_endp;
 } regex_t;
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201106301410.p5UEAnPJ046950>