Date: Sun, 10 Aug 2008 09:27:50 GMT From: Gabor Kovesdan <gabor@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 147062 for review Message-ID: <200808100927.m7A9RoPk071000@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=147062 Change 147062 by gabor@gabor_server on 2008/08/10 09:27:08 - Revert the -w change as it broke fixed string handling Affected files ... .. //depot/projects/soc2008/gabor_textproc/grep/fastgrep.c#3 edit Differences ... ==== //depot/projects/soc2008/gabor_textproc/grep/fastgrep.c#3 (text+ko) ==== @@ -52,6 +52,7 @@ fg->patternLen = strlen(pattern); fg->bol = 0; fg->eol = 0; + fg->wmatch = wflag; fg->reversedSearch = 0; /* @@ -100,6 +101,7 @@ fg->patternLen = strlen(pattern); fg->bol = 0; fg->eol = 0; + fg->wmatch = 0; fg->reversedSearch = 0; /* Remove end-of-line character ('$'). */ @@ -116,13 +118,26 @@ fg->patternLen--; } + /* Remove enclosing [[:<:]] and [[:>:]] (word match). */ + if (wflag) { + /* basic re's use \( \), extended re's ( ) */ + int extra = (grepbehave == GREP_EXTENDED) ? 1 : 2; + fg->patternLen -= 14 + 2 * extra; + fg->wmatch = 7 + extra; + } else if (fg->patternLen >= 14 && + strncmp(pattern + fg->bol, "[[:<:]]", 7) == 0 && + strncmp(pattern + fg->bol + fg->patternLen - 7, "[[:>:]]", 7) == 0) { + fg->patternLen -= 14; + fg->wmatch = 7; + } + /* * Copy pattern minus '^' and '$' characters as well as word * match character classes at the beginning and ending of the * string respectively. */ fg->pattern = grep_malloc(fg->patternLen + 1); - memcpy(fg->pattern, pattern + bol, fg->patternLen); + memcpy(fg->pattern, pattern + bol + fg->wmatch, fg->patternLen); fg->pattern[fg->patternLen] = '\0'; /* Look for ways to cheat...er...avoid the full regex engine. */ @@ -215,6 +230,18 @@ return (0); } +/* + * Word boundaries using regular expressions are defined as the point + * of transition from a non-word char to a word char, or vice versa. + * This means that grep -w +a and grep -w a+ never match anything, + * because they lack a starting or ending transition, but grep -w a+b + * does match a line containing a+b. + */ +#define isword(x) (isalnum(x) || (x) == '_') +#define wmatch(d, l, s, e) \ + ((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \ + e > s && isword(d[s]) && isword(d[e-1])) + int grep_search(fastgrep_t *fg, unsigned char *data, size_t dataLen, regmatch_t *pmatch) { @@ -243,6 +270,9 @@ fg->patternLen) == -1) { pmatch->rm_so = j; pmatch->rm_eo = j + fg->patternLen; + if (!fg->wmatch || wmatch(data, dataLen, + pmatch->rm_so, pmatch->rm_eo)) + rtrnVal = 0; } } } else if (fg->reversedSearch) { @@ -253,6 +283,11 @@ fg->patternLen) == -1) { pmatch->rm_so = j - fg->patternLen; pmatch->rm_eo = j; + if (!fg->wmatch || wmatch(data, dataLen, + pmatch->rm_so, pmatch->rm_eo)) { + rtrnVal = 0; + break; + } } /* Shift if within bounds, otherwise, we are done. */ if (j == fg->patternLen) @@ -266,6 +301,12 @@ if (grep_cmp(fg->pattern, data + j, fg->patternLen) == -1) { pmatch->rm_so = j; pmatch->rm_eo = j + fg->patternLen; + if (fg->patternLen == 0 || !fg->wmatch || + wmatch(data, dataLen, pmatch->rm_so, + pmatch->rm_eo)) { + rtrnVal = 0; + break; + } } /* Shift if within bounds, otherwise, we are done. */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200808100927.m7A9RoPk071000>