Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 10 Aug 2008 08:12:34 GMT
From:      Gabor Kovesdan <gabor@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 147060 for review
Message-ID:  <200808100812.m7A8CYZb051864@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=147060

Change 147060 by gabor@gabor_server on 2008/08/10 08:11:47

	- Add back fixed string code.  It is faster than the regex library. :(

Affected files ...

.. //depot/projects/soc2008/gabor_textproc/grep/Makefile#13 edit
.. //depot/projects/soc2008/gabor_textproc/grep/fastgrep.c#1 add
.. //depot/projects/soc2008/gabor_textproc/grep/grep.c#72 edit
.. //depot/projects/soc2008/gabor_textproc/grep/grep.h#40 edit
.. //depot/projects/soc2008/gabor_textproc/grep/util.c#71 edit

Differences ...

==== //depot/projects/soc2008/gabor_textproc/grep/Makefile#13 (text+ko) ====

@@ -2,7 +2,7 @@
 #	$OpenBSD: Makefile,v 1.6 2003/06/25 15:00:04 millert Exp $
 
 PROG=	grep
-SRCS=	file.c grep.c queue.c util.c
+SRCS=	fastgrep.c file.c grep.c queue.c util.c
 LINKS=	${BINDIR}/grep ${BINDIR}/egrep \
 	${BINDIR}/grep ${BINDIR}/fgrep \
 	${BINDIR}/grep ${BINDIR}/zgrep \

==== //depot/projects/soc2008/gabor_textproc/grep/grep.c#72 (text+ko) ====

@@ -95,6 +95,7 @@
 int		 patterns, pattern_sz;
 char		**pattern;
 regex_t		*r_pattern;
+fastgrep_t	*fg_pattern;
 #ifdef WITH_PCRE
 pcre		**perl_pattern;
 #endif
@@ -590,14 +591,26 @@
 		usage();
 	}
 	if (grepbehave != GREP_PERL) {
-		/* Compile regexes with regcomp() */
+		fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
 		r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
+/*
+ * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
+ * Optimizations should be done there.
+ */
 		for (i = 0; i < patterns; ++i) {
-			c = regcomp(&r_pattern[i], pattern[i], cflags);
-			if (c != 0) {
-				regerror(c, &r_pattern[i], re_error,
-				    RE_ERROR_BUF);
-				errx(2, "%s", re_error);
+			/* Check if cheating is allowed (always is for fgrep). */
+			if (grepbehave == GREP_FIXED)
+				fgrepcomp(&fg_pattern[i], pattern[i]);
+			else {
+				if (fastcomp(&fg_pattern[i], pattern[i])) {
+					/* Fall back to full regex library */
+					c = regcomp(&r_pattern[i], pattern[i], cflags);
+					if (c != 0) {
+						regerror(c, &r_pattern[i], re_error,
+						    RE_ERROR_BUF);
+						errx(2, "%s", re_error);
+					}
+				}
 			}
 		}
 	} else {

==== //depot/projects/soc2008/gabor_textproc/grep/grep.h#40 (text+ko) ====

@@ -27,6 +27,7 @@
  */
 
 #include <bzlib.h>
+#include <limits.h>
 #include <regex.h>
 #include <stdio.h>
 #include <zlib.h>
@@ -90,6 +91,17 @@
 	char		*dat;
 };
 
+typedef struct {
+	unsigned char	*pattern;
+	int		 patternLen;
+	int		 qsBc[UCHAR_MAX + 1];
+	/* flags */
+	int		 bol;
+	int		 eol;
+	int		 wmatch;
+	int		 reversedSearch;
+} fastgrep_t;
+
 /* Flags passed to regcomp() and regexec() */
 extern int	 cflags, eflags;
 
@@ -105,6 +117,7 @@
 extern int	 first, prev, matchall, patterns, epatterns, tail, notfound;
 extern char    **pattern, **epattern;
 extern regex_t	*r_pattern, *er_pattern;
+extern fastgrep_t *fg_pattern;
 
 #ifdef WITH_PCRE
 extern pcre	**perl_pattern;
@@ -138,3 +151,8 @@
 int		 grep_feof(struct file *f);
 int		 grep_fgetc(struct file *f);
 char		*grep_fgetln(struct file *f, size_t *len);
+
+/* fastgrep.c */
+int		 fastcomp(fastgrep_t *, const char *);
+void		 fgrepcomp(fastgrep_t *, const char *);
+int		 grep_search(fastgrep_t *, unsigned char *, size_t, regmatch_t *);

==== //depot/projects/soc2008/gabor_textproc/grep/util.c#71 (text+ko) ====

@@ -256,7 +256,14 @@
 
 			/* Loop to compare with all the patterns */
 			for (i = 0; i < patterns; i++) {
-				if (grepbehave != GREP_PERL) {
+/*
+ * XXX: grep_search() is a workaround for speed up and should be
+ * removed in the future.  See fastgrep.c.
+ */
+				if (fg_pattern[i].pattern) {
+					r = grep_search(&fg_pattern[i], (unsigned char *)l->dat,
+					    l->len, &pmatch);
+				} else if (grepbehave != GREP_PERL) {
 					r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
 					r = (r == 0) ? (vflag ? REG_NOMATCH : 0) : (vflag ? 0 : REG_NOMATCH);
 					st = pmatch.rm_eo;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200808100812.m7A8CYZb051864>