Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 16 Aug 2008 12:44:22 GMT
From:      Gabor Kovesdan <gabor@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 147530 for review
Message-ID:  <200808161244.m7GCiMOu002623@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=147530

Change 147530 by gabor@gabor_server on 2008/08/16 12:43:58

	IFC

Affected files ...

.. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/Makefile#3 integrate
.. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/fastgrep.c#1 branch
.. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/file.c#5 integrate
.. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/grep.c#3 integrate
.. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/grep.h#3 integrate
.. //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/util.c#3 integrate

Differences ...

==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/Makefile#3 (text+ko) ====

@@ -2,7 +2,7 @@
 #	$OpenBSD: Makefile,v 1.6 2003/06/25 15:00:04 millert Exp $
 
 PROG=	grep
-SRCS=	file.c grep.c queue.c util.c
+SRCS=	fastgrep.c file.c grep.c queue.c util.c
 LINKS=	${BINDIR}/grep ${BINDIR}/egrep \
 	${BINDIR}/grep ${BINDIR}/fgrep \
 	${BINDIR}/grep ${BINDIR}/zgrep \

==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/file.c#5 (text+ko) ====

@@ -119,36 +119,22 @@
 {
 	int		 i = 0;
 	char		 ch;
-	size_t		 size;
-	wchar_t		 wbinbuf[BUFSIZ];
-	const char	*src = binbuf;
-	mbstate_t	 mbs;
 
 	/* Fill in the buffer if it is empty. */
 	if (binbufptr == NULL) {
 		/* Only pre-read to the buffer if we need the binary check. */
 		if (binbehave != BINFILE_TEXT) {
-			for (; i < sizeof(wbinbuf) && !grep_feof(f); i++) {
+			for (; i < sizeof(binbuf) && !grep_feof(f); i++) {
 				ch = grep_fgetc(f);
-				binbuf[i] = ch;
+				if (ch != EOF)
+					binbuf[i] = ch;
+				else
+					break;
 			}
-			binbufsiz = i;
-			binbufptr = binbuf;
-
-			/* Convert at most (BUFSIZ * sizeof(wint_t)) characters or
-				(BUFSIZ - 1) bytes to wide character string. */
-			size = mbsnrtowcs(wbinbuf, &src, sizeof(wbinbuf), BUFSIZ - 1, &mbs);
-			f->binary = 0;
-			for (; size > 0; size--)
-				if (iswbinary(wbinbuf[size])) {
-					f->binary = 1;
-					break;
-				}
-			
-		} else {
-			binbufsiz = i;
-			binbufptr = binbuf;
+			f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ? '\0' : '\200', i - 1) != 0;
 		}
+		binbufsiz = i;
+		binbufptr = binbuf;
 	}
 
 	/* Read a line whether from the buffer or from the file itself. */

==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/grep.c#3 (text+ko) ====

@@ -95,6 +95,7 @@
 int		 patterns, pattern_sz;
 char		**pattern;
 regex_t		*r_pattern;
+fastgrep_t	*fg_pattern;
 #ifdef WITH_PCRE
 pcre		**perl_pattern;
 #endif
@@ -590,14 +591,26 @@
 		usage();
 	}
 	if (grepbehave != GREP_PERL) {
-		/* Compile regexes with regcomp() */
+		fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
 		r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
+/*
+ * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
+ * Optimizations should be done there.
+ */
 		for (i = 0; i < patterns; ++i) {
-			c = regcomp(&r_pattern[i], pattern[i], cflags);
-			if (c != 0) {
-				regerror(c, &r_pattern[i], re_error,
-				    RE_ERROR_BUF);
-				errx(2, "%s", re_error);
+			/* Check if cheating is allowed (always is for fgrep). */
+			if (grepbehave == GREP_FIXED)
+				fgrepcomp(&fg_pattern[i], pattern[i]);
+			else {
+				if (fastcomp(&fg_pattern[i], pattern[i])) {
+					/* Fall back to full regex library */
+					c = regcomp(&r_pattern[i], pattern[i], cflags);
+					if (c != 0) {
+						regerror(c, &r_pattern[i], re_error,
+						    RE_ERROR_BUF);
+						errx(2, "%s", re_error);
+					}
+				}
 			}
 		}
 	} else {

==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/grep.h#3 (text+ko) ====

@@ -27,6 +27,7 @@
  */
 
 #include <bzlib.h>
+#include <limits.h>
 #include <regex.h>
 #include <stdio.h>
 #include <zlib.h>
@@ -90,6 +91,16 @@
 	char		*dat;
 };
 
+typedef struct {
+	unsigned char	*pattern;
+	int		 len;
+	int		 qsBc[UCHAR_MAX + 1];
+	/* flags */
+	int		 bol;
+	int		 eol;
+	int		 reversed;
+} fastgrep_t;
+
 /* Flags passed to regcomp() and regexec() */
 extern int	 cflags, eflags;
 
@@ -105,6 +116,7 @@
 extern int	 first, prev, matchall, patterns, epatterns, tail, notfound;
 extern char    **pattern, **epattern;
 extern regex_t	*r_pattern, *er_pattern;
+extern fastgrep_t *fg_pattern;
 
 #ifdef WITH_PCRE
 extern pcre	**perl_pattern;
@@ -138,3 +150,8 @@
 int		 grep_feof(struct file *f);
 int		 grep_fgetc(struct file *f);
 char		*grep_fgetln(struct file *f, size_t *len);
+
+/* fastgrep.c */
+int		 fastcomp(fastgrep_t *, const char *);
+void		 fgrepcomp(fastgrep_t *, const char *);
+int		 grep_search(fastgrep_t *, unsigned char *, size_t, regmatch_t *);

==== //depot/projects/soc2008/gabor_textproc/src/usr.bin/grep/util.c#3 (text+ko) ====

@@ -243,14 +243,12 @@
 	regmatch_t	 pmatch;
 	regmatch_t	 matches[MAX_LINE_MATCHES];
 	regoff_t	 st = 0;
-	int		 c = 0, i, r = 0, m = 0, t;
+	int		 c = 0, i, r = 0, m = 0;
 #ifdef WITH_PCRE
 	int		 ovector[3];
 #endif
 
 	if (!matchall) {
-		t = vflag ? REG_NOMATCH : 0;
-
 		/* Loop to process the whole line */
 		while (st <= l->len) {
 			pmatch.rm_so = st;
@@ -258,8 +256,18 @@
 
 			/* Loop to compare with all the patterns */
 			for (i = 0; i < patterns; i++) {
-				if (grepbehave != GREP_PERL) {
+/*
+ * XXX: grep_search() is a workaround for speed up and should be
+ * removed in the future.  See fastgrep.c.
+ */
+				if (fg_pattern[i].pattern) {
+					r = grep_search(&fg_pattern[i], (unsigned char *)l->dat,
+					    l->len, &pmatch);
+					r = (r == 0) ? (vflag ? REG_NOMATCH : 0) : (vflag ? 0 : REG_NOMATCH);
+					st = pmatch.rm_eo;
+				} else if (grepbehave != GREP_PERL) {
 					r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
+					r = (r == 0) ? (vflag ? REG_NOMATCH : 0) : (vflag ? 0 : REG_NOMATCH);
 					st = pmatch.rm_eo;
 				} else {
 #ifdef WITH_PCRE
@@ -271,7 +279,7 @@
 					;
 #endif
 				}
-				if (r == REG_NOMATCH && t == 0)
+				if (r == REG_NOMATCH)
 					continue;
 				/* Check for full match */
 				if (r == 0 && xflag)
@@ -290,7 +298,7 @@
 						r = REG_NOMATCH;
 					free(wbegin);
 				}
-				if (r == t) {
+				if (r == 0) {
 					if (m == 0)
 						c++;
 					if (m < MAX_LINE_MATCHES)
@@ -313,7 +321,7 @@
 		return (c); /* Binary file */
 
 	/* Dealing with the context */
-	if ((tail || (c && !vflag)) && !cflag && !qflag) {
+	if ((tail || c) && !cflag && !qflag) {
 		if (c) {
 			if (!first && !prev && !tail && Aflag)
 				printf("--\n");



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200808161244.m7GCiMOu002623>