Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 13 Sep 2011 23:28:04 +0000 (UTC)
From:      Gabor Kovesdan <gabor@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r225541 - user/gabor/grep/trunk/regex
Message-ID:  <201109132328.p8DNS4Uu001058@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gabor
Date: Tue Sep 13 23:28:04 2011
New Revision: 225541
URL: http://svn.freebsd.org/changeset/base/225541

Log:
  - WCS dot position != MBS dot position

Modified:
  user/gabor/grep/trunk/regex/fastmatch.h
  user/gabor/grep/trunk/regex/tre-fastmatch.c

Modified: user/gabor/grep/trunk/regex/fastmatch.h
==============================================================================
--- user/gabor/grep/trunk/regex/fastmatch.h	Tue Sep 13 23:23:08 2011	(r225540)
+++ user/gabor/grep/trunk/regex/fastmatch.h	Tue Sep 13 23:28:04 2011	(r225541)
@@ -13,7 +13,6 @@ typedef struct {
   size_t	 len;
   wchar_t	*wpattern;
   bool		*wescmap;
-  unsigned int	 hasdot;
   unsigned int	 qsBc[UCHAR_MAX + 1];
   unsigned int	*bmGs;
   char		*pattern;
@@ -24,6 +23,7 @@ typedef struct {
   const char	*re_endp;
 
   /* flags */
+  bool		 hasdot;
   bool		 bol;
   bool		 eol;
   bool		 word;

Modified: user/gabor/grep/trunk/regex/tre-fastmatch.c
==============================================================================
--- user/gabor/grep/trunk/regex/tre-fastmatch.c	Tue Sep 13 23:23:08 2011	(r225540)
+++ user/gabor/grep/trunk/regex/tre-fastmatch.c	Tue Sep 13 23:28:04 2011	(r225541)
@@ -224,8 +224,8 @@ static int	fastcmp(const void *, const b
 
 #define _FILL_QSBC							\
   for (unsigned int i = 0; i <= UCHAR_MAX; i++)				\
-    fg->qsBc[i] = fg->len - fg->hasdot;					\
-  for (unsigned int i = fg->hasdot + 1; i < fg->len; i++)		\
+    fg->qsBc[i] = fg->len - hasdot;					\
+  for (unsigned int i = hasdot + 1; i < fg->len; i++)			\
     {									\
       fg->qsBc[(unsigned char)fg->pattern[i]] = fg->len - i;		\
       DPRINT(("BC shift for char %c is %zu\n", fg->pattern[i],		\
@@ -279,14 +279,14 @@ static int	fastcmp(const void *, const b
 
 #define _FILL_QSBC_WIDE							\
   /* Adjust the shift based on location of the last dot ('.'). */	\
-  fg->defBc = fg->wlen - fg->hasdot;					\
+  fg->defBc = fg->wlen - whasdot;					\
 									\
   /* Preprocess pattern. */						\
   fg->qsBc_table = hashtable_init(fg->wlen * (fg->icase ? 8 : 4),	\
 				  sizeof(tre_char_t), sizeof(int));	\
   if (!fg->qsBc_table)							\
     FAIL_COMP(REG_ESPACE);						\
-  for (unsigned int i = fg->hasdot + 1; i < fg->wlen; i++)		\
+  for (unsigned int i = whasdot + 1; i < fg->wlen; i++)			\
     {									\
       int k = fg->wlen - i;						\
       int r;								\
@@ -309,14 +309,14 @@ static int	fastcmp(const void *, const b
 
 #define _FILL_QSBC_WIDE_REVERSED					\
   /* Adjust the shift based on location of the last dot ('.'). */	\
-  fg->defBc = (size_t)firstdot;						\
+  fg->defBc = (size_t)wfirstdot;					\
 									\
   /* Preprocess pattern. */						\
   fg->qsBc_table = hashtable_init(fg->wlen * (fg->icase ? 8 : 4),	\
 				  sizeof(tre_char_t), sizeof(int));	\
   if (!fg->qsBc_table)							\
     FAIL_COMP(REG_ESPACE);						\
-  for (int i = firstdot - 1; i >= 0; i--)				\
+  for (int i = wfirstdot - 1; i >= 0; i--)				\
     {									\
       int k = i + 1;							\
       int r;								\
@@ -518,7 +518,8 @@ int
 tre_compile_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n,
 		    int cflags)
 {
-  ssize_t firstdot = -1;
+  size_t hasdot = 0, whasdot = 0;
+  ssize_t firstdot = -1, wfirstdot = -1;
 
   INIT_COMP;
 
@@ -557,8 +558,8 @@ tre_compile_fast(fastmatch_t *fg, const 
 		 int cflags)
 {
   tre_char_t *tmp;
-  size_t pos = 0;
-  ssize_t firstdot = -1;
+  size_t pos = 0, hasdot = 0, whasdot = 0;;
+  ssize_t firstdot = -1, wfirstdot = -1;
   bool escaped = false;
   bool *_escmap = NULL;
 
@@ -647,9 +648,9 @@ tre_compile_fast(fastmatch_t *fg, const 
 	      }
 	    else
 	      {
-		fg->hasdot = i;
-		if (firstdot == -1)
-			firstdot = i;
+		whasdot = i;
+		if (wfirstdot == -1)
+			wfirstdot = i;
 		STORE_CHAR;
 	      }
 	    continue;
@@ -699,6 +700,8 @@ badpat:
       return REG_BADPAT;
     }
 
+  fg->hasdot = whasdot;
+
   /*
    * The pattern has been processed and copied to tmp as a literal string
    * with escapes, anchors (^$) and the word boundary match character
@@ -708,25 +711,33 @@ badpat:
   SAVE_PATTERN(tmp, pos, fg->wpattern, fg->wlen);
   fg->wescmap = _escmap;
   STORE_MBS_PAT;
-  if (fg->wescmap != NULL)
+  if (fg->hasdot || (fg->wescmap != NULL))
     {
-      escaped = false;
-
-      fg->escmap = xmalloc(fg->len * sizeof(bool));
-      if (!fg->escmap)
+      if (fg->wescmap != NULL)
 	{
-	  tre_free_fast(fg);
-	  return REG_ESPACE;
+	  fg->escmap = xmalloc(fg->len * sizeof(bool));
+	  if (!fg->escmap)
+	    {
+	      tre_free_fast(fg);
+	      return REG_ESPACE;
+	    }
 	}
 
+      escaped = false;
       for (unsigned int i = 0; i < fg->len; i++)
 	if (fg->pattern[i] == '\\')
-	  escaped = ! escaped;
+	  escaped = !escaped;
 	else if (fg->pattern[i] == '.' && escaped)
 	  {
 	    fg->escmap[i] = true;
 	    escaped = false;
 	  }
+	else if (fg->pattern[i] == '.' && !escaped)
+	  {
+	    hasdot = i;
+	    if (firstdot == -1)
+	      firstdot = i;
+	  }
 	else
 	  escaped = false;
     }
@@ -743,7 +754,7 @@ badpat:
 	 fg->icase ? 'y' : 'n', fg->word ? 'y' : 'n',
 	 fg->newline ? 'y' : 'n'));
 
-  if ((firstdot > -1) && (fg->len - fg->hasdot + 1 < (size_t)firstdot) &&
+  if ((wfirstdot > -1) && (fg->wlen - whasdot + 1 < (size_t)wfirstdot) &&
       fg->nosub)
     {
       fg->reversed = true;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201109132328.p8DNS4Uu001058>