Date: Tue, 14 Feb 2012 12:13:04 +0000 (UTC) From: Gabor Kovesdan <gabor@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r231675 - user/gabor/tre-integration/contrib/tre/lib Message-ID: <201202141213.q1ECD4Eh097683@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: gabor Date: Tue Feb 14 12:13:04 2012 New Revision: 231675 URL: http://svn.freebsd.org/changeset/base/231675 Log: - Add some more verbose comments about how this stuff works Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c Modified: user/gabor/tre-integration/contrib/tre/lib/regexec.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/regexec.c Tue Feb 14 12:06:56 2012 (r231674) +++ user/gabor/tre-integration/contrib/tre/lib/regexec.c Tue Feb 14 12:13:04 2012 (r231675) @@ -191,6 +191,10 @@ tre_match(const tre_tnfa_t *tnfa, const const char *data_byte = string; const tre_char_t *data_wide = string; + /* + * REG_NEWLINE: looking for the longest fragment and then + * isolate the line and run the automaton. + */ if (heur->type == HEUR_LONGEST) { while (st < len) @@ -198,11 +202,17 @@ tre_match(const tre_tnfa_t *tnfa, const size_t eo, so; SEEK_TO(st); + + /* Match for heuristic */ ret = tre_match_fast(heur->heurs[0], string, len - st, type, nmatch, pmatch, eflags); if (ret != REG_OK) return ret; + /* + * If we do not know the length of the possibly matching part, + * look for newlines. + */ if (heur->tlen == -1) { for (so = st + pmatch[0].rm_so - 1; ; so--) @@ -221,6 +231,11 @@ tre_match(const tre_tnfa_t *tnfa, const break; } } + + /* + * If we know the possible match length, just check the narrowest + * context that we can, without looking for explicit newlines. + */ else { size_t rem = heur->tlen - (pmatch[0].rm_eo - pmatch[0].rm_so); @@ -235,6 +250,14 @@ tre_match(const tre_tnfa_t *tnfa, const } return REG_NOMATCH; } + + /* + * General case when REG_NEWLINE is not set. Look for prefix, + * intermediate and suffix heuristics is available, to determine + * the context where the automaton will be invoked. The start + * of the context is st and the relative end offset from st is + * stored in n. + */ else { while (st < len) @@ -249,7 +272,7 @@ tre_match(const tre_tnfa_t *tnfa, const st += pmatch[0].rm_so; n = pmatch[0].rm_eo - pmatch[0].rm_so; - /* Intermediate heuristics */ + /* Intermediate heuristics (if any) */ while (!(heur->heurs[i] == NULL) && ((heur->heurs[i + 1] != NULL) || ((heur->heurs[i + 1] == NULL) && (heur->type == HEUR_PREFIX_ARRAY))))
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201202141213.q1ECD4Eh097683>