Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 20 May 2017 03:51:31 +0000 (UTC)
From:      Ed Maste <emaste@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r318571 - in head: contrib/netbsd-tests/usr.bin/grep usr.bin/grep
Message-ID:  <201705200351.v4K3pV3g090643@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: emaste
Date: Sat May 20 03:51:31 2017
New Revision: 318571
URL: https://svnweb.freebsd.org/changeset/base/318571

Log:
  bsdgrep: emit more than MAX_LINE_MATCHES per line
  
  We should not set an arbitrary cap on the number of matches on a line,
  and in any case MAX_LINE_MATCHES of 32 is much too low.  Instead, if we
  match more than MAX_LINE_MATCHES, keep processing and matching from the
  last match until all are found.
  
  For the regression test, we produce 4096 matches (larger than we expect
  we'll ever set MAX_LINE_MATCHES) and make sure we actually get 4096
  lines of output with the -o flag.
  
  We'll also make sure that every distinct line is getting its own line
  number to detect line metadata not being printed as appropriate along
  the way.
  
  PR:		218811
  Submitted by:	Kyle Evans <kevans91@ksu.edu>
  Reported by:	jbeich
  Reviewed by:	cem
  Differential Revision:	https://reviews.freebsd.org/D10577

Modified:
  head/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
  head/usr.bin/grep/util.c

Modified: head/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
==============================================================================
--- head/contrib/netbsd-tests/usr.bin/grep/t_grep.sh	Sat May 20 01:04:47 2017	(r318570)
+++ head/contrib/netbsd-tests/usr.bin/grep/t_grep.sh	Sat May 20 03:51:31 2017	(r318571)
@@ -413,6 +413,26 @@ wflag_emptypat_body()
 	atf_check -o file:test4 grep -w -e "" test4
 }
 
+atf_test_case excessive_matches
+excessive_matches_head()
+{
+	atf_set "descr" "Check for proper handling of lines with excessive matches (PR 218811)"
+}
+excessive_matches_body()
+{
+	grep_type
+	if [ $? -eq $GREP_TYPE_GNU_FREEBSD ]; then
+		atf_expect_fail "this test does not pass with GNU grep in base"
+	fi
+
+	for i in $(jot 4096); do
+		printf "x" >> test.in
+	done
+
+	atf_check -s exit:0 -x '[ $(grep -o x test.in | wc -l) -eq 4096 ]'
+	#atf_check -s exit:1 -x 'grep -on x test.in | grep -v "1:x"'
+}
+
 atf_test_case fgrep_sanity
 fgrep_sanity_head()
 {
@@ -603,6 +623,7 @@ atf_init_test_cases()
 	atf_add_test_case egrep_empty_invalid
 	atf_add_test_case zerolen
 	atf_add_test_case wflag_emptypat
+	atf_add_test_case excessive_matches
 	atf_add_test_case wv_combo_break
 	atf_add_test_case fgrep_sanity
 	atf_add_test_case egrep_sanity

Modified: head/usr.bin/grep/util.c
==============================================================================
--- head/usr.bin/grep/util.c	Sat May 20 01:04:47 2017	(r318570)
+++ head/usr.bin/grep/util.c	Sat May 20 03:51:31 2017	(r318571)
@@ -63,6 +63,7 @@ static bool	 first_match = true;
 struct parsec {
 	regmatch_t matches[MAX_LINE_MATCHES];	/* Matches made */
 	struct str ln;				/* Current line */
+	size_t lnstart;				/* Start of line processing */
 	size_t matchidx;			/* Latest used match index */
 	bool binary;				/* Binary file? */
 };
@@ -247,8 +248,9 @@ procfile(const char *fn)
 	mcount = mlimit;
 
 	for (c = 0;  c == 0 || !(lflag || qflag); ) {
-		/* Reset match count for every line processed */
+		/* Reset match count and line start for every line processed */
 		pc.matchidx = 0;
+		pc.lnstart = 0;
 		pc.ln.off += pc.ln.len + 1;
 		if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL ||
 		    pc.ln.len == 0) {
@@ -288,6 +290,14 @@ procfile(const char *fn)
 		/* Print the matching line, but only if not quiet/binary */
 		if (t == 0 && printmatch) {
 			printline(&pc, ':');
+			while (pc.matchidx >= MAX_LINE_MATCHES) {
+				/* Reset matchidx and try again */
+				pc.matchidx = 0;
+				if (procline(&pc) == 0)
+					printline(&pc, ':');
+				else
+					break;
+			}
 			first_match = false;
 			same_file = true;
 			last_outed = 0;
@@ -356,11 +366,11 @@ procline(struct parsec *pc)
 {
 	regmatch_t pmatch, lastmatch, chkmatch;
 	wchar_t wbegin, wend;
-	size_t st = 0, nst = 0;
+	size_t st, nst;
 	unsigned int i;
 	int c = 0, r = 0, lastmatches = 0, leflags = eflags;
 	size_t startm = 0, matchidx;
-	int retry;
+	unsigned int retry;
 
 	matchidx = pc->matchidx;
 
@@ -376,6 +386,8 @@ procline(struct parsec *pc)
 	} else if (matchall)
 		return (0);
 
+	st = pc->lnstart;
+	nst = 0;
 	/* Initialize to avoid a false positive warning from GCC. */
 	lastmatch.rm_so = lastmatch.rm_eo = 0;
 
@@ -432,12 +444,12 @@ procline(struct parsec *pc)
 				 * still match a whole word.
 				 */
 				if (r == REG_NOMATCH &&
-				    (retry == 0 || pmatch.rm_so + 1 < retry))
+				    (retry == pc->lnstart ||
+				    pmatch.rm_so + 1 < retry))
 					retry = pmatch.rm_so + 1;
 				if (r == REG_NOMATCH)
 					continue;
 			}
-
 			lastmatches++;
 			lastmatch = pmatch;
 
@@ -466,8 +478,11 @@ procline(struct parsec *pc)
 			}
 			/* avoid excessive matching - skip further patterns */
 			if ((color == NULL && !oflag) || qflag || lflag ||
-			    matchidx >= MAX_LINE_MATCHES)
+			    matchidx >= MAX_LINE_MATCHES) {
+				pc->lnstart = nst;
+				lastmatches = 0;
 				break;
+			}
 		}
 
 		/*
@@ -475,7 +490,7 @@ procline(struct parsec *pc)
 		 * again just in case we still have a chance to match later in
 		 * the string.
 		 */
-		if (lastmatches == 0 && retry > 0) {
+		if (lastmatches == 0 && retry > pc->lnstart) {
 			st = retry;
 			continue;
 		}
@@ -497,6 +512,7 @@ procline(struct parsec *pc)
 
 		/* Advance st based on previous matches */
 		st = nst;
+		pc->lnstart = st;
 	}
 
 	/* Reflect the new matchidx in the context */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201705200351.v4K3pV3g090643>