Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 12 Aug 2011 20:02:47 +0000 (UTC)
From:      Gabor Kovesdan <gabor@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r224805 - user/gabor/tre-integration/contrib/tre/lib
Message-ID:  <201108122002.p7CK2lbH080766@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gabor
Date: Fri Aug 12 20:02:47 2011
New Revision: 224805
URL: http://svn.freebsd.org/changeset/base/224805

Log:
  - Add support for TRE's REG_NEWLINE

Modified:
  user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
  user/gabor/tre-integration/contrib/tre/lib/fastmatch.h

Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.c	Fri Aug 12 19:51:28 2011	(r224804)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c	Fri Aug 12 20:02:47 2011	(r224805)
@@ -44,7 +44,7 @@
 #include "xmalloc.h"
 
 static int	fastcmp(const void *, const void *, size_t,
-			tre_str_type_t, bool);
+			tre_str_type_t, bool, bool);
 
 /*
  * We will work with wide characters if they are supported
@@ -101,11 +101,11 @@ static int	fastcmp(const void *, const v
     {									\
       case STR_WIDE:							\
 	mismatch = fastcmp(fg->wpattern, startptr, fg->wlen, type,	\
-			   fg->icase);					\
+			   fg->icase, fg->newline);			\
 	break;								\
       default:								\
 	mismatch = fastcmp(fg->pattern, startptr, fg->len, type,	\
-			   fg->icase);					\
+			   fg->icase, fg->newline);			\
       }									\
 
 #define IS_OUT_OF_BOUNDS						\
@@ -337,6 +337,7 @@ static int	fastcmp(const void *, const v
   memset(fg, 0, sizeof(*fg));						\
   fg->icase = (cflags & REG_ICASE);					\
   fg->word = (cflags & REG_WORD);					\
+  fg->newline = (cflags & REG_NEWLINE);					\
 									\
   /* Cannot handle REG_ICASE with MB string */				\
   if (fg->icase && (MB_CUR_MAX > 1))					\
@@ -451,6 +452,13 @@ tre_fastcomp(fastmatch_t *fg, const tre_
   return REG_OK;
 }
 
+#define _SHIFT_ONE							\
+  {									\
+    shift = 1;								\
+    j += shift;								\
+    continue;								\
+  }
+
 #define CHECK_WORD_BOUNDARY						\
   {									\
     bool bbound, ebound;						\
@@ -472,13 +480,26 @@ tre_fastcomp(fastmatch_t *fg, const tre_
 	    (str_byte[j + fg->len] == '_'));				\
       }									\
     if (!bbound || !ebound)						\
-      {									\
-	shift = 1;							\
-	j += shift;							\
-	continue;							\
-      }									\
+      _SHIFT_ONE;							\
   }
 
+#define _BOL_COND							\
+  ((j == 0) || ((type == STR_WIDE) ? tre_isspace(str_wide[j - 1]) :	\
+    isspace(str_byte[j - 1])))
+
+#define CHECK_BOL_ANCHOR						\
+    if (!_BOL_COND)							\
+      _SHIFT_ONE;
+
+#define _EOL_COND							\
+  ((type == STR_WIDE) ?							\
+    ((j + fg->wlen == len) || tre_isspace(str_wide[j + fg->wlen])) :	\
+    ((j + fg->len == len) || isspace(str_byte[j + fg->wlen])))
+
+#define CHECK_EOL_ANCHOR						\
+    if (!_EOL_COND)							\
+      _SHIFT_ONE;
+
 /*
  * Executes matching of the precompiled pattern on the input string.
  * Returns REG_OK or REG_NOMATCH depending on if we find a match or not.
@@ -521,7 +542,7 @@ tre_fastexec(const fastmatch_t *fg, cons
 
   /* XXX: Fix with word boundaries */
   /* Only try once at the beginning or ending of the line. */
-  if (fg->bol || fg->eol)
+  if (!fg->newline && (fg->bol || fg->eol))
     {
       /* Simple text comparison. */
       if (!((fg->bol && fg->eol) &&
@@ -551,6 +572,10 @@ tre_fastexec(const fastmatch_t *fg, cons
 	    {
 	      if (fg->word)
 		CHECK_WORD_BOUNDARY;
+	      if (fg->bol)
+		CHECK_BOL_ANCHOR;
+	      if (fg->eol)
+		CHECK_EOL_ANCHOR;
 	      pmatch[0].rm_so = j;
 	      pmatch[0].rm_eo = j + ((type == STR_WIDE) ? fg->wlen : fg->len);
 	      return REG_OK;
@@ -582,7 +607,7 @@ tre_fastfree(fastmatch_t *fg)
  */
 static inline int
 fastcmp(const void *pat, const void *data, size_t len,
-	tre_str_type_t type, bool icase)
+	tre_str_type_t type, bool icase, bool newline)
 {
   const char *str_byte = data;
   const char *pat_byte = pat;
@@ -594,14 +619,16 @@ fastcmp(const void *pat, const void *dat
     switch (type)
       {
 	case STR_WIDE:
-	  if (pat_wide[i] == L'.')
+	  if (pat_wide[i] == TRE_CHAR('.') &&
+	      (!newline || (str_wide[i] != TRE_CHAR('\n'))))
 	    continue;
 	  if (icase ? (towlower(pat_wide[i]) == towlower(str_wide[i]))
 		    : (pat_wide[i] == str_wide[i]))
 	    continue;
 	  break;
 	default:
-	  if (pat_byte[i] == '.')
+	  if (pat_byte[i] == '.' &&
+	      (!newline || (str_byte[i] != '\n')))
 	    continue;
 	  if (icase ? (tolower(pat_byte[i]) == tolower(str_byte[i]))
 		    : (pat_byte[i] == str_byte[i]))

Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.h
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/fastmatch.h	Fri Aug 12 19:51:28 2011	(r224804)
+++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.h	Fri Aug 12 20:02:47 2011	(r224805)
@@ -55,6 +55,7 @@ typedef struct {
   bool eol;
   bool word;
   bool icase;
+  bool newline;
 } fastmatch_t;
 
 int	tre_fastcomp_literal(fastmatch_t *preg, const tre_char_t *regex,



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201108122002.p7CK2lbH080766>