Date: Sun, 10 Aug 2008 08:12:34 GMT From: Gabor Kovesdan <gabor@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 147060 for review Message-ID: <200808100812.m7A8CYZb051864@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=147060 Change 147060 by gabor@gabor_server on 2008/08/10 08:11:47 - Add back fixed string code. It is faster than the regex library. :( Affected files ... .. //depot/projects/soc2008/gabor_textproc/grep/Makefile#13 edit .. //depot/projects/soc2008/gabor_textproc/grep/fastgrep.c#1 add .. //depot/projects/soc2008/gabor_textproc/grep/grep.c#72 edit .. //depot/projects/soc2008/gabor_textproc/grep/grep.h#40 edit .. //depot/projects/soc2008/gabor_textproc/grep/util.c#71 edit Differences ... ==== //depot/projects/soc2008/gabor_textproc/grep/Makefile#13 (text+ko) ==== @@ -2,7 +2,7 @@ # $OpenBSD: Makefile,v 1.6 2003/06/25 15:00:04 millert Exp $ PROG= grep -SRCS= file.c grep.c queue.c util.c +SRCS= fastgrep.c file.c grep.c queue.c util.c LINKS= ${BINDIR}/grep ${BINDIR}/egrep \ ${BINDIR}/grep ${BINDIR}/fgrep \ ${BINDIR}/grep ${BINDIR}/zgrep \ ==== //depot/projects/soc2008/gabor_textproc/grep/grep.c#72 (text+ko) ==== @@ -95,6 +95,7 @@ int patterns, pattern_sz; char **pattern; regex_t *r_pattern; +fastgrep_t *fg_pattern; #ifdef WITH_PCRE pcre **perl_pattern; #endif @@ -590,14 +591,26 @@ usage(); } if (grepbehave != GREP_PERL) { - /* Compile regexes with regcomp() */ + fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern)); r_pattern = grep_calloc(patterns, sizeof(*r_pattern)); +/* + * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance. + * Optimizations should be done there. + */ for (i = 0; i < patterns; ++i) { - c = regcomp(&r_pattern[i], pattern[i], cflags); - if (c != 0) { - regerror(c, &r_pattern[i], re_error, - RE_ERROR_BUF); - errx(2, "%s", re_error); + /* Check if cheating is allowed (always is for fgrep). */ + if (grepbehave == GREP_FIXED) + fgrepcomp(&fg_pattern[i], pattern[i]); + else { + if (fastcomp(&fg_pattern[i], pattern[i])) { + /* Fall back to full regex library */ + c = regcomp(&r_pattern[i], pattern[i], cflags); + if (c != 0) { + regerror(c, &r_pattern[i], re_error, + RE_ERROR_BUF); + errx(2, "%s", re_error); + } + } } } } else { ==== //depot/projects/soc2008/gabor_textproc/grep/grep.h#40 (text+ko) ==== @@ -27,6 +27,7 @@ */ #include <bzlib.h> +#include <limits.h> #include <regex.h> #include <stdio.h> #include <zlib.h> @@ -90,6 +91,17 @@ char *dat; }; +typedef struct { + unsigned char *pattern; + int patternLen; + int qsBc[UCHAR_MAX + 1]; + /* flags */ + int bol; + int eol; + int wmatch; + int reversedSearch; +} fastgrep_t; + /* Flags passed to regcomp() and regexec() */ extern int cflags, eflags; @@ -105,6 +117,7 @@ extern int first, prev, matchall, patterns, epatterns, tail, notfound; extern char **pattern, **epattern; extern regex_t *r_pattern, *er_pattern; +extern fastgrep_t *fg_pattern; #ifdef WITH_PCRE extern pcre **perl_pattern; @@ -138,3 +151,8 @@ int grep_feof(struct file *f); int grep_fgetc(struct file *f); char *grep_fgetln(struct file *f, size_t *len); + +/* fastgrep.c */ +int fastcomp(fastgrep_t *, const char *); +void fgrepcomp(fastgrep_t *, const char *); +int grep_search(fastgrep_t *, unsigned char *, size_t, regmatch_t *); ==== //depot/projects/soc2008/gabor_textproc/grep/util.c#71 (text+ko) ==== @@ -256,7 +256,14 @@ /* Loop to compare with all the patterns */ for (i = 0; i < patterns; i++) { - if (grepbehave != GREP_PERL) { +/* + * XXX: grep_search() is a workaround for speed up and should be + * removed in the future. See fastgrep.c. + */ + if (fg_pattern[i].pattern) { + r = grep_search(&fg_pattern[i], (unsigned char *)l->dat, + l->len, &pmatch); + } else if (grepbehave != GREP_PERL) { r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags); r = (r == 0) ? (vflag ? REG_NOMATCH : 0) : (vflag ? 0 : REG_NOMATCH); st = pmatch.rm_eo;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200808100812.m7A8CYZb051864>