FreeBSD Mail Archives

Date:      Tue, 14 Feb 2012 12:13:04 +0000 (UTC)
From:      Gabor Kovesdan <gabor@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-user@freebsd.org
Subject:   svn commit: r231675 - user/gabor/tre-integration/contrib/tre/lib
Message-ID:  <201202141213.q1ECD4Eh097683@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help

Author: gabor
Date: Tue Feb 14 12:13:04 2012
New Revision: 231675
URL: http://svn.freebsd.org/changeset/base/231675

Log:
  - Add some more verbose comments about how this stuff works

Modified:
  user/gabor/tre-integration/contrib/tre/lib/regexec.c

Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c
==============================================================================
--- user/gabor/tre-integration/contrib/tre/lib/regexec.c	Tue Feb 14 12:06:56 2012	(r231674)
+++ user/gabor/tre-integration/contrib/tre/lib/regexec.c	Tue Feb 14 12:13:04 2012	(r231675)
@@ -191,6 +191,10 @@ tre_match(const tre_tnfa_t *tnfa, const 
       const char *data_byte = string;
       const tre_char_t *data_wide = string;
 
+      /*
+       * REG_NEWLINE: looking for the longest fragment and then
+       * isolate the line and run the automaton.
+       */
       if (heur->type == HEUR_LONGEST)
 	{
 	  while (st < len)
@@ -198,11 +202,17 @@ tre_match(const tre_tnfa_t *tnfa, const 
 	      size_t eo, so;
 
 	      SEEK_TO(st);
+
+	      /* Match for heuristic */
 	      ret = tre_match_fast(heur->heurs[0], string, len - st, type, nmatch,
 				   pmatch, eflags);
 	      if (ret != REG_OK)
 		return ret;
 
+	      /*
+	       * If we do not know the length of the possibly matching part,
+	       * look for newlines.
+	       */
 	      if (heur->tlen == -1)
 		{
 		  for (so = st + pmatch[0].rm_so - 1; ; so--)
@@ -221,6 +231,11 @@ tre_match(const tre_tnfa_t *tnfa, const 
 		      break;
 		    }
 		}
+
+	      /*
+	       * If we know the possible match length, just check the narrowest
+	       * context that we can, without looking for explicit newlines.
+	       */
 	      else
 		{
 		  size_t rem = heur->tlen - (pmatch[0].rm_eo - pmatch[0].rm_so);
@@ -235,6 +250,14 @@ tre_match(const tre_tnfa_t *tnfa, const 
 	   }
 	   return REG_NOMATCH;
 	}
+
+      /*
+       * General case when REG_NEWLINE is not set.  Look for prefix,
+       * intermediate and suffix heuristics is available, to determine
+       * the context where the automaton will be invoked.  The start
+       * of the context is st and the relative end offset from st is
+       * stored in n.
+       */
       else
 	{
 	  while (st < len)
@@ -249,7 +272,7 @@ tre_match(const tre_tnfa_t *tnfa, const 
 	     st += pmatch[0].rm_so;
 	     n = pmatch[0].rm_eo - pmatch[0].rm_so;
 
-	     /* Intermediate heuristics */
+	     /* Intermediate heuristics (if any) */
 	     while (!(heur->heurs[i] == NULL) &&
 		   ((heur->heurs[i + 1] != NULL) ||
 		   ((heur->heurs[i + 1] == NULL) && (heur->type == HEUR_PREFIX_ARRAY))))

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201202141213.q1ECD4Eh097683>

Header And Logo

Peripheral Links

Site Navigation

Header And Logo

Peripheral Links

Search

Site Navigation