Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 16 Aug 2017 18:00:32 +0000 (UTC)
From:      Kyle Evans <kevans@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r322587 - in stable/11: contrib/netbsd-tests/usr.bin/grep usr.bin/grep usr.bin/grep/tests
Message-ID:  <201708161800.v7GI0W8A059555@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kevans
Date: Wed Aug 16 18:00:32 2017
New Revision: 322587
URL: https://svnweb.freebsd.org/changeset/base/322587

Log:
  bsdgrep: fix -w flag matching with an empty pattern
  
  MFC r317703: bsdgrep: fix -w flag matching with an empty pattern
  
  -w flag matching with an empty pattern was generally 'broken', allowing
  matches to occur on any line whether or not it actually matches -w
  criteria.
  
  This fix required a good amount of refactoring to address.  procline()
  is altered to *only* process the line and return whether it was a match
  or not, necessary to be able to short-circuit the whole function in case
  of this matchall flag. -m flag handling is moved out as well because it
  suffers from the same fate as context handling if we bypass any actual
  pattern matching.
  
  The matching context (matches, mostly) didn't previously exist outside
  of procline(), so we go ahead and create context object for file
  processing bits to pass around.  grep_printline() was created due to
  this, for the scenarios where the matches don't actually matter and we
  just want to print a line or two, a la flushing the context queue and
  no -o or --color specified.
  
  Damage from this broken behavior would have been mitigated by the fact
  that it is unlikely users would invoke grep -w with an empty pattern.
  
  This was identified while checking PR 105221 for problems it this may
  cause in BSD grep, but PR 105221 is *not* a report of this behavior.
  
  MFC r317741: bsdgrep: correct uninitialized variable introduced in r317703
  
  MFC r317842: bsdgrep: don't ouptut matches with -c, -l, -L
  
  Refactoring done in r317703 broke -c, -l, and -L flags implying
  suppression of match printing.  Fortunately this is just a matter of not
  doing any printing of the resulting matches and context printing was not
  broken in this refactoring.
  
  Add some regression tests since this area may still see further
  refactoring, include different context flags as well even though they
  were not broken in this case.
  
  PR:		219077
  Approved by:	emaste (mentor, blanket MFC)

Added:
  stable/11/contrib/netbsd-tests/usr.bin/grep/d_context_e.in
     - copied unchanged from r317703, head/contrib/netbsd-tests/usr.bin/grep/d_context_e.in
  stable/11/contrib/netbsd-tests/usr.bin/grep/d_context_e.out
     - copied unchanged from r317703, head/contrib/netbsd-tests/usr.bin/grep/d_context_e.out
  stable/11/contrib/netbsd-tests/usr.bin/grep/d_context_f.out
     - copied unchanged from r317703, head/contrib/netbsd-tests/usr.bin/grep/d_context_f.out
  stable/11/contrib/netbsd-tests/usr.bin/grep/d_context_g.out
     - copied unchanged from r317703, head/contrib/netbsd-tests/usr.bin/grep/d_context_g.out
Modified:
  stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
  stable/11/usr.bin/grep/grep.c
  stable/11/usr.bin/grep/grep.h
  stable/11/usr.bin/grep/queue.c
  stable/11/usr.bin/grep/tests/Makefile
  stable/11/usr.bin/grep/util.c
Directory Properties:
  stable/11/   (props changed)

Copied: stable/11/contrib/netbsd-tests/usr.bin/grep/d_context_e.in (from r317703, head/contrib/netbsd-tests/usr.bin/grep/d_context_e.in)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/11/contrib/netbsd-tests/usr.bin/grep/d_context_e.in	Wed Aug 16 18:00:32 2017	(r322587, copy of r317703, head/contrib/netbsd-tests/usr.bin/grep/d_context_e.in)
@@ -0,0 +1,10 @@
+monkey
+banana
+apple
+fruit
+monkey
+banna
+apple
+fruit
+apple
+monkey

Copied: stable/11/contrib/netbsd-tests/usr.bin/grep/d_context_e.out (from r317703, head/contrib/netbsd-tests/usr.bin/grep/d_context_e.out)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/11/contrib/netbsd-tests/usr.bin/grep/d_context_e.out	Wed Aug 16 18:00:32 2017	(r322587, copy of r317703, head/contrib/netbsd-tests/usr.bin/grep/d_context_e.out)
@@ -0,0 +1,9 @@
+monkey
+banana
+apple
+fruit
+monkey
+banna
+--
+apple
+monkey

Copied: stable/11/contrib/netbsd-tests/usr.bin/grep/d_context_f.out (from r317703, head/contrib/netbsd-tests/usr.bin/grep/d_context_f.out)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/11/contrib/netbsd-tests/usr.bin/grep/d_context_f.out	Wed Aug 16 18:00:32 2017	(r322587, copy of r317703, head/contrib/netbsd-tests/usr.bin/grep/d_context_f.out)
@@ -0,0 +1,9 @@
+monkey
+banana
+apple
+fruit
+monkey
+banna
+apple
+fruit
+apple

Copied: stable/11/contrib/netbsd-tests/usr.bin/grep/d_context_g.out (from r317703, head/contrib/netbsd-tests/usr.bin/grep/d_context_g.out)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ stable/11/contrib/netbsd-tests/usr.bin/grep/d_context_g.out	Wed Aug 16 18:00:32 2017	(r322587, copy of r317703, head/contrib/netbsd-tests/usr.bin/grep/d_context_g.out)
@@ -0,0 +1,8 @@
+apple
+fruit
+--
+banna
+apple
+fruit
+apple
+monkey

Modified: stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
==============================================================================
--- stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh	Wed Aug 16 17:54:29 2017	(r322586)
+++ stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh	Wed Aug 16 18:00:32 2017	(r322587)
@@ -171,6 +171,12 @@ context_body()
 	atf_check -o file:d_context_b.out grep -A3 tilt d_context_a.in
 	atf_check -o file:d_context_c.out grep -B4 Whig d_context_a.in
 	atf_check -o file:d_context_d.out grep -C1 pig d_context_a.in d_context_b.in
+	atf_check -o file:d_context_e.out \
+	    grep -E -C1 '(banana|monkey)' d_context_e.in
+	atf_check -o file:d_context_f.out \
+	    grep -Ev -B2 '(banana|monkey|fruit)' d_context_e.in
+	atf_check -o file:d_context_g.out \
+	    grep -Ev -A1 '(banana|monkey|fruit)' d_context_e.in
 }
 
 atf_test_case file_exp
@@ -386,6 +392,32 @@ zerolen_body()
 	atf_check -o inline:"Eggs\nCheese\n" grep -v -e "^$" test1
 }
 
+atf_test_case wflag_emptypat
+wflag_emptypat_head()
+{
+	atf_set "descr" "Check for proper handling of -w with an empty pattern (PR 105221)"
+}
+wflag_emptypat_body()
+{
+	grep_type
+	if [ $? -eq $GREP_TYPE_GNU_FREEBSD ]; then
+		atf_expect_fail "this test does not pass with GNU grep in base"
+	fi
+
+	printf "" > test1
+	printf "\n" > test2
+	printf "qaz" > test3
+	printf " qaz\n" > test4
+
+	atf_check -s exit:1 -o empty grep -w -e "" test1
+
+	atf_check -o file:test2 grep -w -e "" test2
+
+	atf_check -s exit:1 -o empty grep -w -e "" test3
+
+	atf_check -o file:test4 grep -w -e "" test4
+}
+
 atf_test_case fgrep_sanity
 fgrep_sanity_head()
 {
@@ -462,6 +494,34 @@ wv_combo_break_body()
 	atf_check -s exit:1 grep -v -w "x" test1
 	atf_check -s exit:1 grep -v -w "x" test2
 }
+
+atf_test_case grep_nomatch_flags
+grep_nomatch_flags_head()
+{
+	atf_set "descr" "Check for no match (-c, -l, -L, -q) flags not producing line matches or context (PR 219077)"
+}
+
+grep_nomatch_flags_body()
+{
+	printf "A\nB\nC\n" > test1
+
+	atf_check -o inline:"1\n" grep -c -C 1 -e "B" test1
+	atf_check -o inline:"1\n" grep -c -B 1 -e "B" test1
+	atf_check -o inline:"1\n" grep -c -A 1 -e "B" test1
+	atf_check -o inline:"1\n" grep -c -C 1 -e "B" test1
+
+	atf_check -o inline:"test1\n" grep -l -e "B" test1
+	atf_check -o inline:"test1\n" grep -l -B 1 -e "B" test1
+	atf_check -o inline:"test1\n" grep -l -A 1 -e "B" test1
+	atf_check -o inline:"test1\n" grep -l -C 1 -e "B" test1
+
+	atf_check -s exit:1 -o inline:"test1\n" grep -L -e "D" test1
+
+	atf_check -o empty grep -q -e "B" test1
+	atf_check -o empty grep -q -B 1 -e "B" test1
+	atf_check -o empty grep -q -A 1 -e "B" test1
+	atf_check -o empty grep -q -C 1 -e "B" test1
+}
 # End FreeBSD
 
 atf_init_test_cases()
@@ -490,9 +550,11 @@ atf_init_test_cases()
 	atf_add_test_case escmap
 	atf_add_test_case egrep_empty_invalid
 	atf_add_test_case zerolen
+	atf_add_test_case wflag_emptypat
 	atf_add_test_case wv_combo_break
 	atf_add_test_case fgrep_sanity
 	atf_add_test_case egrep_sanity
 	atf_add_test_case grep_sanity
+	atf_add_test_case grep_nomatch_flags
 # End FreeBSD
 }

Modified: stable/11/usr.bin/grep/grep.c
==============================================================================
--- stable/11/usr.bin/grep/grep.c	Wed Aug 16 17:54:29 2017	(r322586)
+++ stable/11/usr.bin/grep/grep.c	Wed Aug 16 18:00:32 2017	(r322587)
@@ -82,7 +82,13 @@ const char	*errstr[] = {
 int		 cflags = REG_NOSUB;
 int		 eflags = REG_STARTEND;
 
-/* Shortcut for matching all cases like empty regex */
+/* XXX TODO: Get rid of this flag.
+ * matchall is a gross hack that means that an empty pattern was passed to us.
+ * It is a necessary evil at the moment because our regex(3) implementation
+ * does not allow for empty patterns, as supported by POSIX's definition of
+ * grammar for BREs/EREs. When libregex becomes available, it would be wise
+ * to remove this and let regex(3) handle the dirty details of empty patterns.
+ */
 bool		 matchall;
 
 /* Searching patterns */
@@ -154,9 +160,6 @@ enum {
 static inline const char	*init_color(const char *);
 
 /* Housekeeping */
-bool	 first = true;	/* flag whether we are processing the first match */
-bool	 prev;		/* flag whether or not the previous line matched */
-int	 tail;		/* lines left to print */
 bool	 file_err;	/* file reading error */
 
 /*
@@ -730,20 +733,25 @@ main(int argc, char *argv[])
 #endif
 	r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
 
-	/* Check if cheating is allowed (always is for fgrep). */
-	for (i = 0; i < patterns; ++i) {
+	/* Don't process any patterns if we have a blank one */
+	if (!matchall) {
+		/* Check if cheating is allowed (always is for fgrep). */
+		for (i = 0; i < patterns; ++i) {
 #ifndef WITHOUT_FASTMATCH
-		/* Attempt compilation with fastmatch regex and fallback to
-		   regex(3) if it fails. */
-		if (fastncomp(&fg_pattern[i], pattern[i].pat,
-		    pattern[i].len, cflags) == 0)
-			continue;
+			/*
+			 * Attempt compilation with fastmatch regex and
+			 * fallback to regex(3) if it fails.
+			 */
+			if (fastncomp(&fg_pattern[i], pattern[i].pat,
+			    pattern[i].len, cflags) == 0)
+				continue;
 #endif
-		c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
-		if (c != 0) {
-			regerror(c, &r_pattern[i], re_error,
-			    RE_ERROR_BUF);
-			errx(2, "%s", re_error);
+			c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
+			if (c != 0) {
+				regerror(c, &r_pattern[i], re_error,
+				    RE_ERROR_BUF);
+				errx(2, "%s", re_error);
+			}
 		}
 	}
 

Modified: stable/11/usr.bin/grep/grep.h
==============================================================================
--- stable/11/usr.bin/grep/grep.h	Wed Aug 16 17:54:29 2017	(r322586)
+++ stable/11/usr.bin/grep/grep.h	Wed Aug 16 18:00:32 2017	(r322587)
@@ -123,8 +123,7 @@ extern char	*label;
 extern const char *color;
 extern int	 binbehave, devbehave, dirbehave, filebehave, grepbehave, linkbehave;
 
-extern bool	 file_err, first, matchall, prev;
-extern int	 tail;
+extern bool	 file_err, matchall;
 extern unsigned int dpatterns, fpatterns, patterns;
 extern struct pat *pattern;
 extern struct epat *dpattern, *fpattern;
@@ -145,10 +144,10 @@ void	*grep_malloc(size_t size);
 void	*grep_calloc(size_t nmemb, size_t size);
 void	*grep_realloc(void *ptr, size_t size);
 char	*grep_strdup(const char *str);
-void	 printline(struct str *line, int sep, regmatch_t *matches, int m);
+void	 grep_printline(struct str *line, int sep);
 
 /* queue.c */
-void	 enqueue(struct str *x);
+bool	 enqueue(struct str *x);
 void	 printqueue(void);
 void	 clearqueue(void);
 

Modified: stable/11/usr.bin/grep/queue.c
==============================================================================
--- stable/11/usr.bin/grep/queue.c	Wed Aug 16 17:54:29 2017	(r322586)
+++ stable/11/usr.bin/grep/queue.c	Wed Aug 16 18:00:32 2017	(r322587)
@@ -53,7 +53,10 @@ static unsigned long long	count;
 
 static struct qentry	*dequeue(void);
 
-void
+/*
+ * Enqueue another line; return true if we've dequeued a line as a result
+ */
+bool
 enqueue(struct str *x)
 {
 	struct qentry *item;
@@ -72,7 +75,9 @@ enqueue(struct str *x)
 		item = dequeue();
 		free(item->data.dat);
 		free(item);
+		return (true);
 	}
+	return (false);
 }
 
 static struct qentry *
@@ -95,7 +100,7 @@ printqueue(void)
 	struct qentry *item;
 
 	while ((item = dequeue()) != NULL) {
-		printline(&item->data, '-', NULL, 0);
+		grep_printline(&item->data, '-');
 		free(item->data.dat);
 		free(item);
 	}

Modified: stable/11/usr.bin/grep/tests/Makefile
==============================================================================
--- stable/11/usr.bin/grep/tests/Makefile	Wed Aug 16 17:54:29 2017	(r322586)
+++ stable/11/usr.bin/grep/tests/Makefile	Wed Aug 16 18:00:32 2017	(r322587)
@@ -20,9 +20,13 @@ ${PACKAGE}FILES+=		d_context2_c.out
 ${PACKAGE}FILES+=		d_context_a.in
 ${PACKAGE}FILES+=		d_context_a.out
 ${PACKAGE}FILES+=		d_context_b.in
+${PACKAGE}FILES+=		d_context_e.in
 ${PACKAGE}FILES+=		d_context_b.out
 ${PACKAGE}FILES+=		d_context_c.out
 ${PACKAGE}FILES+=		d_context_d.out
+${PACKAGE}FILES+=		d_context_e.out
+${PACKAGE}FILES+=		d_context_f.out
+${PACKAGE}FILES+=		d_context_g.out
 ${PACKAGE}FILES+=		d_egrep.out
 ${PACKAGE}FILES+=		d_escmap.in
 ${PACKAGE}FILES+=		d_f_file_empty.in

Modified: stable/11/usr.bin/grep/util.c
==============================================================================
--- stable/11/usr.bin/grep/util.c	Wed Aug 16 17:54:29 2017	(r322586)
+++ stable/11/usr.bin/grep/util.c	Wed Aug 16 18:00:32 2017	(r322587)
@@ -54,12 +54,24 @@ __FBSDID("$FreeBSD$");
 #endif
 #include "grep.h"
 
-static int	 linesqueued;
-static int	 procline(struct str *l, int);
+static bool	 first_match = true;
 
-static int	 lasta;
-static bool	 ctxover;
+/*
+ * Parsing context; used to hold things like matches made and
+ * other useful bits
+ */
+struct parsec {
+	regmatch_t matches[MAX_LINE_MATCHES];	/* Matches made */
+	struct str ln;				/* Current line */
+	size_t matchidx;			/* Latest used match index */
+	bool binary;				/* Binary file? */
+};
 
+
+static int procline(struct parsec *pc);
+static void printline(struct parsec *pc, int sep);
+static void printline_metadata(struct str *line, int sep);
+
 bool
 file_matching(const char *fname)
 {
@@ -183,14 +195,14 @@ grep_tree(char **argv)
 int
 procfile(const char *fn)
 {
+	struct parsec pc;
 	struct file *f;
 	struct stat sb;
-	struct str ln;
+	struct str *ln;
 	mode_t s;
-	int c, t;
+	int c, last_outed, t, tail;
+	bool doctx, printmatch, same_file;
 
-	mcount = mlimit;
-
 	if (strcmp(fn, "-") == 0) {
 		fn = label != NULL ? label : getstr(1);
 		f = grep_open(NULL);
@@ -213,57 +225,99 @@ procfile(const char *fn)
 		return (0);
 	}
 
-	ln.file = grep_malloc(strlen(fn) + 1);
-	strcpy(ln.file, fn);
-	ln.line_no = 0;
-	ln.len = 0;
-	ctxover = false;
-	linesqueued = 0;
+	/* Convenience */
+	ln = &pc.ln;
+	pc.ln.file = grep_malloc(strlen(fn) + 1);
+	strcpy(pc.ln.file, fn);
+	pc.ln.line_no = 0;
+	pc.ln.len = 0;
+	pc.ln.off = -1;
+	pc.binary = f->binary;
 	tail = 0;
-	lasta = 0;
-	ln.off = -1;
+	last_outed = 0;
+	same_file = false;
+	doctx = false;
+	printmatch = true;
+	if ((pc.binary && binbehave == BINFILE_BIN) || cflag || qflag ||
+	    lflag || Lflag)
+		printmatch = false;
+	if (printmatch && (Aflag != 0 || Bflag != 0))
+		doctx = true;
+	mcount = mlimit;
 
 	for (c = 0;  c == 0 || !(lflag || qflag); ) {
-		ln.off += ln.len + 1;
-		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) {
-			if (ln.line_no == 0 && matchall)
-				exit(0);
+		/* Reset match count for every line processed */
+		pc.matchidx = 0;
+		pc.ln.off += pc.ln.len + 1;
+		if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL ||
+		    pc.ln.len == 0) {
+			if (pc.ln.line_no == 0 && matchall)
+				/*
+				 * An empty file with an empty pattern and the
+				 * -w flag does not match
+				 */
+				exit(matchall && wflag ? 1 : 0);
 			else
 				break;
 		}
-		if (ln.len > 0 && ln.dat[ln.len - 1] == fileeol)
-			--ln.len;
-		ln.line_no++;
 
+		if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol)
+			--pc.ln.len;
+		pc.ln.line_no++;
+
 		/* Return if we need to skip a binary file */
-		if (f->binary && binbehave == BINFILE_SKIP) {
+		if (pc.binary && binbehave == BINFILE_SKIP) {
 			grep_close(f);
-			free(ln.file);
+			free(pc.ln.file);
 			free(f);
 			return (0);
 		}
 
-		/* Process the file line-by-line, enqueue non-matching lines */
-		if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
-			/* Except don't enqueue lines that appear in -A ctx */
-			if (ln.line_no == 0 || lasta != ln.line_no) {
-				/* queue is maxed to Bflag number of lines */
-				enqueue(&ln);
-				linesqueued++;
-				ctxover = false;
+		if ((t = procline(&pc)) == 0)
+			++c;
+
+		/* Deal with any -B context or context separators */
+		if (t == 0 && doctx) {
+			if (!first_match && (!same_file || last_outed > 0))
+				printf("--\n");
+			if (Bflag > 0)
+				printqueue();
+			tail = Aflag;
+		}
+		/* Print the matching line, but only if not quiet/binary */
+		if (t == 0 && printmatch) {
+			printline(&pc, ':');
+			first_match = false;
+			same_file = true;
+			last_outed = 0;
+		}
+		if (t != 0 && doctx) {
+			/* Deal with any -A context */
+			if (tail > 0) {
+				printline(&pc, '-');
+				tail--;
+				if (Bflag > 0)
+					clearqueue();
 			} else {
 				/*
-				 * Indicate to procline() that we have ctx
-				 * overlap and make sure queue is empty.
+				 * Enqueue non-matching lines for -B context.
+				 * If we're not actually doing -B context or if
+				 * the enqueue resulted in a line being rotated
+				 * out, then go ahead and increment last_outed
+				 * to signify a gap between context/match.
 				 */
-				if (!ctxover)
-					clearqueue();
-				ctxover = true;
+				if (Bflag == 0 || (Bflag > 0 && enqueue(ln)))
+					++last_outed;
 			}
 		}
-		c += t;
-		if (mflag && mcount <= 0)
-			break;
+
+		/* Count the matches if we have a match limit */
+		if (t == 0 && mflag) {
+			--mcount;
+			if (mflag && mcount <= 0)
+				break;
+		}
+
 	}
 	if (Bflag > 0)
 		clearqueue();
@@ -271,7 +325,7 @@ procfile(const char *fn)
 
 	if (cflag) {
 		if (!hflag)
-			printf("%s:", ln.file);
+			printf("%s:", pc.ln.file);
 		printf("%u\n", c);
 	}
 	if (lflag && !qflag && c != 0)
@@ -282,7 +336,7 @@ procfile(const char *fn)
 	    binbehave == BINFILE_BIN && f->binary && !qflag)
 		printf(getstr(8), fn);
 
-	free(ln.file);
+	free(pc.ln.file);
 	free(f);
 	return (c);
 }
@@ -297,62 +351,72 @@ procfile(const char *fn)
  * appropriate output.
  */
 static int
-procline(struct str *l, int nottext)
+procline(struct parsec *pc)
 {
-	regmatch_t matches[MAX_LINE_MATCHES];
-	regmatch_t pmatch, lastmatch;
+	regmatch_t pmatch, lastmatch, chkmatch;
+	wchar_t wbegin, wend;
 	size_t st = 0, nst = 0;
 	unsigned int i;
-	int c = 0, m = 0, r = 0, lastmatches = 0, leflags = eflags;
-	int startm = 0;
+	int c = 0, r = 0, lastmatches = 0, leflags = eflags;
+	size_t startm = 0, matchidx;
 	int retry;
 
+	matchidx = pc->matchidx;
+
+	/* Special case: empty pattern with -w flag, check first character */
+	if (matchall && wflag) {
+		if (pc->ln.len == 0)
+			return (0);
+		wend = L' ';
+		if (sscanf(&pc->ln.dat[0], "%lc", &wend) != 1 || iswword(wend))
+			return (1);
+		else
+			return (0);
+	} else if (matchall)
+		return (0);
+
 	/* Initialize to avoid a false positive warning from GCC. */
 	lastmatch.rm_so = lastmatch.rm_eo = 0;
 
 	/* Loop to process the whole line */
-	while (st <= l->len) {
+	while (st <= pc->ln.len) {
 		lastmatches = 0;
-		startm = m;
+		startm = matchidx;
 		retry = 0;
 		if (st > 0)
 			leflags |= REG_NOTBOL;
 		/* Loop to compare with all the patterns */
 		for (i = 0; i < patterns; i++) {
 			pmatch.rm_so = st;
-			pmatch.rm_eo = l->len;
+			pmatch.rm_eo = pc->ln.len;
 #ifndef WITHOUT_FASTMATCH
 			if (fg_pattern[i].pattern)
 				r = fastexec(&fg_pattern[i],
-				    l->dat, 1, &pmatch, leflags);
+				    pc->ln.dat, 1, &pmatch, leflags);
 			else
 #endif
-				r = regexec(&r_pattern[i], l->dat, 1,
+				r = regexec(&r_pattern[i], pc->ln.dat, 1,
 				    &pmatch, leflags);
-			r = (r == 0) ? 0 : REG_NOMATCH;
-			if (r == REG_NOMATCH)
+			if (r != 0)
 				continue;
 			/* Check for full match */
-			if (r == 0 && xflag)
-				if (pmatch.rm_so != 0 ||
-				    (size_t)pmatch.rm_eo != l->len)
-					r = REG_NOMATCH;
+			if (xflag && (pmatch.rm_so != 0 ||
+			    (size_t)pmatch.rm_eo != pc->ln.len))
+				continue;
 			/* Check for whole word match */
 #ifndef WITHOUT_FASTMATCH
-			if (r == 0 && (wflag || fg_pattern[i].word)) {
+			if (wflag || fg_pattern[i].word) {
 #else
-			if (r == 0 && wflag) {
+			if (wflag) {
 #endif
-				wchar_t wbegin, wend;
-
 				wbegin = wend = L' ';
 				if (pmatch.rm_so != 0 &&
-				    sscanf(&l->dat[pmatch.rm_so - 1],
+				    sscanf(&pc->ln.dat[pmatch.rm_so - 1],
 				    "%lc", &wbegin) != 1)
 					r = REG_NOMATCH;
 				else if ((size_t)pmatch.rm_eo !=
-				    l->len &&
-				    sscanf(&l->dat[pmatch.rm_eo],
+				    pc->ln.len &&
+				    sscanf(&pc->ln.dat[pmatch.rm_eo],
 				    "%lc", &wend) != 1)
 					r = REG_NOMATCH;
 				else if (iswword(wbegin) ||
@@ -361,7 +425,7 @@ procline(struct str *l, int nottext)
 				/*
 				 * If we're doing whole word matching and we
 				 * matched once, then we should try the pattern
-				 * again after advancing just past the start  of
+				 * again after advancing just past the start of
 				 * the earliest match. This allows the pattern
 				 * to  match later on in the line and possibly
 				 * still match a whole word.
@@ -369,33 +433,40 @@ procline(struct str *l, int nottext)
 				if (r == REG_NOMATCH &&
 				    (retry == 0 || pmatch.rm_so + 1 < retry))
 					retry = pmatch.rm_so + 1;
+				if (r == REG_NOMATCH)
+					continue;
 			}
-			if (r == 0) {
-				lastmatches++;
-				lastmatch = pmatch;
-				if (m == 0)
-					c++;
 
-				if (m < MAX_LINE_MATCHES) {
-					/* Replace previous match if the new one is earlier and/or longer */
-					if (m > startm) {
-						if (pmatch.rm_so < matches[m-1].rm_so ||
-						    (pmatch.rm_so == matches[m-1].rm_so && (pmatch.rm_eo - pmatch.rm_so) > (matches[m-1].rm_eo - matches[m-1].rm_so))) {
-							matches[m-1] = pmatch;
-							nst = pmatch.rm_eo;
-						}
-					} else {
-						/* Advance as normal if not */
-						matches[m++] = pmatch;
-						nst = pmatch.rm_eo;
-					}
-				}
+			lastmatches++;
+			lastmatch = pmatch;
 
-				/* matches - skip further patterns */
-				if ((color == NULL && !oflag) ||
-				    qflag || lflag)
-					break;
+			if (matchidx == 0)
+				c++;
+
+			/*
+			 * Replace previous match if the new one is earlier
+			 * and/or longer. This will lead to some amount of
+			 * extra work if -o/--color are specified, but it's
+			 * worth it from a correctness point of view.
+			 */
+			if (matchidx > startm) {
+				chkmatch = pc->matches[matchidx - 1];
+				if (pmatch.rm_so < chkmatch.rm_so ||
+				    (pmatch.rm_so == chkmatch.rm_so &&
+				    (pmatch.rm_eo - pmatch.rm_so) >
+				    (chkmatch.rm_eo - chkmatch.rm_so))) {
+					pc->matches[matchidx - 1] = pmatch;
+					nst = pmatch.rm_eo;
+				}
+			} else {
+				/* Advance as normal if not */
+				pc->matches[matchidx++] = pmatch;
+				nst = pmatch.rm_eo;
 			}
+			/* avoid excessive matching - skip further patterns */
+			if ((color == NULL && !oflag) || qflag || lflag ||
+			    matchidx >= MAX_LINE_MATCHES)
+				break;
 		}
 
 		/*
@@ -414,7 +485,7 @@ procline(struct str *l, int nottext)
 
 		/* If we didn't have any matches or REG_NOSUB set */
 		if (lastmatches == 0 || (cflags & REG_NOSUB))
-			nst = l->len;
+			nst = pc->ln.len;
 
 		if (lastmatches == 0)
 			/* No matches */
@@ -427,45 +498,11 @@ procline(struct str *l, int nottext)
 		st = nst;
 	}
 
-
+	/* Reflect the new matchidx in the context */
+	pc->matchidx = matchidx;
 	if (vflag)
 		c = !c;
-
-	/* Count the matches if we have a match limit */
-	if (mflag)
-		mcount -= c;
-
-	if (c && binbehave == BINFILE_BIN && nottext)
-		return (c); /* Binary file */
-
-	/* Dealing with the context */
-	if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
-		if (c) {
-			if (!first && !prev && !tail && (Bflag || Aflag) &&
-			    !ctxover)
-				printf("--\n");
-			tail = Aflag;
-			if (Bflag > 0) {
-				printqueue();
-				ctxover = false;
-			}
-			linesqueued = 0;
-			printline(l, ':', matches, m);
-		} else {
-			/* Print -A lines following matches */
-			lasta = l->line_no;
-			printline(l, '-', matches, m);
-			tail--;
-		}
-	}
-
-	if (c) {
-		prev = true;
-		first = false;
-	} else
-		prev = false;
-
-	return (c);
+	return (c ? 0 : 1);
 }
 
 /*
@@ -520,69 +557,89 @@ grep_strdup(const char *str)
 }
 
 /*
- * Prints a matching line according to the command line options.
+ * Print an entire line as-is, there are no inline matches to consider. This is
+ * used for printing context.
  */
-void
-printline(struct str *line, int sep, regmatch_t *matches, int m)
+void grep_printline(struct str *line, int sep) {
+	printline_metadata(line, sep);
+	fwrite(line->dat, line->len, 1, stdout);
+	putchar(fileeol);
+}
+
+static void
+printline_metadata(struct str *line, int sep)
 {
-	size_t a = 0;
-	int i, n = 0;
+	bool printsep;
 
-	/* If matchall, everything matches but don't actually print for -o */
-	if (oflag && matchall)
-		return;
-
+	printsep = false;
 	if (!hflag) {
 		if (!nullflag) {
 			fputs(line->file, stdout);
-			++n;
+			printsep = true;
 		} else {
 			printf("%s", line->file);
 			putchar(0);
 		}
 	}
 	if (nflag) {
-		if (n > 0)
+		if (printsep)
 			putchar(sep);
 		printf("%d", line->line_no);
-		++n;
+		printsep = true;
 	}
 	if (bflag) {
-		if (n > 0)
+		if (printsep)
 			putchar(sep);
 		printf("%lld", (long long)line->off);
-		++n;
+		printsep = true;
 	}
-	if (n)
+	if (printsep)
 		putchar(sep);
+}
+
+/*
+ * Prints a matching line according to the command line options.
+ */
+static void
+printline(struct parsec *pc, int sep)
+{
+	size_t a = 0;
+	size_t i, matchidx;
+	regmatch_t match;
+
+	/* If matchall, everything matches but don't actually print for -o */
+	if (oflag && matchall)
+		return;
+
+	matchidx = pc->matchidx;
+
 	/* --color and -o */
-	if ((oflag || color) && m > 0) {
-		for (i = 0; i < m; i++) {
+	if ((oflag || color) && matchidx > 0) {
+		printline_metadata(&pc->ln, sep);
+		for (i = 0; i < matchidx; i++) {
+			match = pc->matches[i];
 			/* Don't output zero length matches */
-			if (matches[i].rm_so == matches[i].rm_eo)
+			if (match.rm_so == match.rm_eo)
 				continue;
 			if (!oflag)
-				fwrite(line->dat + a, matches[i].rm_so - a, 1,
+				fwrite(pc->ln.dat + a, match.rm_so - a, 1,
 				    stdout);
-			if (color) 
+			if (color)
 				fprintf(stdout, "\33[%sm\33[K", color);
-
-				fwrite(line->dat + matches[i].rm_so, 
-				    matches[i].rm_eo - matches[i].rm_so, 1,
-				    stdout);
-			if (color) 
+			fwrite(pc->ln.dat + match.rm_so,
+			    match.rm_eo - match.rm_so, 1, stdout);
+			if (color)
 				fprintf(stdout, "\33[m\33[K");
-			a = matches[i].rm_eo;
+			a = match.rm_eo;
 			if (oflag)
 				putchar('\n');
 		}
 		if (!oflag) {
-			if (line->len - a > 0)
-				fwrite(line->dat + a, line->len - a, 1, stdout);
+			if (pc->ln.len - a > 0)
+				fwrite(pc->ln.dat + a, pc->ln.len - a, 1,
+				    stdout);
 			putchar('\n');
 		}
-	} else {
-		fwrite(line->dat, line->len, 1, stdout);
-		putchar(fileeol);
-	}
+	} else
+		grep_printline(&pc->ln, sep);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201708161800.v7GI0W8A059555>