Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 11 Sep 2017 15:52:24 +0000 (UTC)
From:      Kyle Evans <kevans@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r323443 - in stable/11: contrib/netbsd-tests/usr.bin/grep usr.bin/grep
Message-ID:  <201709111552.v8BFqOAi005604@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kevans
Date: Mon Sep 11 15:52:24 2017
New Revision: 323443
URL: https://svnweb.freebsd.org/changeset/base/323443

Log:
  bsdgrep: add a primitive literal matcher to unbreak fgrep in some scenarios
  
  MFC r322825: bsdgrep: add some additional tests for fgrep
  
  Previously added tests only check that fgrep is somewhat sane and works. Add
  some more tests that check that the implementation is basically functional
  and not producing incorrect results with various flags.
  
  MFC r322826: bsdgrep: add a primitive literal matcher
  
  fgrep/grep -F will error out at runtime if compiled with a regex(3)
  that does not define REG_NOSPEC or REG_LITERAL. glibc is one such regex(3)
  implementation, and as it turns out they don't support literal matching at
  all.
  
  Provide a primitive literal matcher for use with glibc and other
  implementations that don't support literal matching so that we don't
  completely lose fgrep/grep -F if compiled against libgnuregex on stable/10,
  stable/11, or other systems that we don't necessarily support.
  
  This is a wholly unoptimized implementation with no plans to optimize it as
  of now. This is due to both its use-case being primarily on unsupported
  systems in the near-distant future and that it's reinventing the wheel that
  we already have available as a feature of regex(3).
  
  PR:		222201
  Approved by:	emaste (mentor, blanket MFC)

Modified:
  stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
  stable/11/usr.bin/grep/grep.c
  stable/11/usr.bin/grep/grep.h
  stable/11/usr.bin/grep/util.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh
==============================================================================
--- stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh	Mon Sep 11 15:38:51 2017	(r323442)
+++ stable/11/contrib/netbsd-tests/usr.bin/grep/t_grep.sh	Mon Sep 11 15:52:24 2017	(r323443)
@@ -685,6 +685,59 @@ matchall_body()
 
 	atf_check -s exit:1 grep "" test1
 }
+
+atf_test_case fgrep_multipattern
+fgrep_multipattern_head()
+{
+	atf_set "descr" "Check proper behavior with multiple patterns supplied to fgrep"
+}
+fgrep_multipattern_body()
+{
+	printf "Foo\nBar\nBaz" > test1
+
+	atf_check -o inline:"Foo\nBaz\n" grep -F -e "Foo" -e "Baz" test1
+	atf_check -o inline:"Foo\nBaz\n" grep -F -e "Baz" -e "Foo" test1
+	atf_check -o inline:"Bar\nBaz\n" grep -F -e "Bar" -e "Baz" test1
+}
+
+atf_test_case fgrep_icase
+fgrep_icase_head()
+{
+	atf_set "descr" "Check proper handling of -i supplied to fgrep"
+}
+fgrep_icase_body()
+{
+	printf "Foo\nBar\nBaz" > test1
+
+	atf_check -o inline:"Foo\nBaz\n" grep -Fi -e "foo" -e "baz" test1
+	atf_check -o inline:"Foo\nBaz\n" grep -Fi -e "baz" -e "foo" test1
+	atf_check -o inline:"Bar\nBaz\n" grep -Fi -e "bar" -e "baz" test1
+	atf_check -o inline:"Bar\nBaz\n" grep -Fi -e "BAR" -e "bAz" test1
+}
+
+atf_test_case fgrep_oflag
+fgrep_oflag_head()
+{
+	atf_set "descr" "Check proper handling of -o supplied to fgrep"
+}
+fgrep_oflag_body()
+{
+	printf "abcdefghi\n" > test1
+
+	atf_check -o inline:"a\n" grep -Fo "a" test1
+	atf_check -o inline:"i\n" grep -Fo "i" test1
+	atf_check -o inline:"abc\n" grep -Fo "abc" test1
+	atf_check -o inline:"fgh\n" grep -Fo "fgh" test1
+	atf_check -o inline:"cde\n" grep -Fo "cde" test1
+	atf_check -o inline:"bcd\n" grep -Fo -e "bcd" -e "cde" test1
+	atf_check -o inline:"bcd\nefg\n" grep -Fo -e "bcd" -e "efg" test1
+
+	atf_check -s exit:1 grep -Fo "xabc" test1
+	atf_check -s exit:1 grep -Fo "abcx" test1
+	atf_check -s exit:1 grep -Fo "xghi" test1
+	atf_check -s exit:1 grep -Fo "ghix" test1
+	atf_check -s exit:1 grep -Fo "abcdefghiklmnopqrstuvwxyz" test1
+}
 # End FreeBSD
 
 atf_init_test_cases()
@@ -726,5 +779,8 @@ atf_init_test_cases()
 	atf_add_test_case mmap
 	atf_add_test_case mmap_eof_not_eol
 	atf_add_test_case matchall
+	atf_add_test_case fgrep_multipattern
+	atf_add_test_case fgrep_icase
+	atf_add_test_case fgrep_oflag
 # End FreeBSD
 }

Modified: stable/11/usr.bin/grep/grep.c
==============================================================================
--- stable/11/usr.bin/grep/grep.c	Mon Sep 11 15:38:51 2017	(r323442)
+++ stable/11/usr.bin/grep/grep.c	Mon Sep 11 15:52:24 2017	(r323443)
@@ -721,12 +721,19 @@ main(int argc, char *argv[])
 	case GREP_BASIC:
 		break;
 	case GREP_FIXED:
+		/*
+		 * regex(3) implementations that support fixed-string searches generally
+		 * define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag
+		 * here. If neither are defined, GREP_FIXED later implies that the
+		 * internal literal matcher should be used. Other cflags that have
+		 * the same interpretation as REG_NOSPEC and REG_LITERAL should be
+		 * similarly added here, and grep.h should be amended to take this into
+		 * consideration when defining WITH_INTERNAL_NOSPEC.
+		 */
 #if defined(REG_NOSPEC)
 		cflags |= REG_NOSPEC;
 #elif defined(REG_LITERAL)
 		cflags |= REG_LITERAL;
-#else
-		errx(2, "literal expressions not supported at compile time");
 #endif
 		break;
 	case GREP_EXTENDED:
@@ -743,7 +750,11 @@ main(int argc, char *argv[])
 	r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
 
 	/* Don't process any patterns if we have a blank one */
+#ifdef WITH_INTERNAL_NOSPEC
+	if (!matchall && grepbehave != GREP_FIXED) {
+#else
 	if (!matchall) {
+#endif
 		/* Check if cheating is allowed (always is for fgrep). */
 		for (i = 0; i < patterns; ++i) {
 #ifndef WITHOUT_FASTMATCH

Modified: stable/11/usr.bin/grep/grep.h
==============================================================================
--- stable/11/usr.bin/grep/grep.h	Mon Sep 11 15:38:51 2017	(r323442)
+++ stable/11/usr.bin/grep/grep.h	Mon Sep 11 15:52:24 2017	(r323443)
@@ -57,6 +57,10 @@ extern const char		*errstr[];
 #define	GREP_BASIC	1
 #define	GREP_EXTENDED	2
 
+#if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
+#define WITH_INTERNAL_NOSPEC
+#endif
+
 #define	BINFILE_BIN	0
 #define	BINFILE_SKIP	1
 #define	BINFILE_TEXT	2

Modified: stable/11/usr.bin/grep/util.c
==============================================================================
--- stable/11/usr.bin/grep/util.c	Mon Sep 11 15:38:51 2017	(r323442)
+++ stable/11/usr.bin/grep/util.c	Mon Sep 11 15:52:24 2017	(r323443)
@@ -70,7 +70,10 @@ struct parsec {
 	bool		binary;				/* Binary file? */
 };
 
-
+#ifdef WITH_INTERNAL_NOSPEC
+static int litexec(const struct pat *pat, const char *string,
+    size_t nmatch, regmatch_t pmatch[]);
+#endif
 static int procline(struct parsec *pc);
 static void printline(struct parsec *pc, int sep);
 static void printline_metadata(struct str *line, int sep);
@@ -350,6 +353,67 @@ procfile(const char *fn)
 	return (c);
 }
 
+#ifdef WITH_INTERNAL_NOSPEC
+/*
+ * Internal implementation of literal string search within a string, modeled
+ * after regexec(3), for use when the regex(3) implementation doesn't offer
+ * either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD
+ * config, but in other scenarios such as building against libgnuregex or on
+ * some non-FreeBSD OSes.
+ */
+static int
+litexec(const struct pat *pat, const char *string, size_t nmatch,
+    regmatch_t pmatch[])
+{
+	char *(*strstr_fn)(const char *, const char *);
+	char *sub, *subject;
+	const char *search;
+	size_t idx, n, ofs, stringlen;
+
+	if (cflags & REG_ICASE)
+		strstr_fn = strcasestr;
+	else
+		strstr_fn = strstr;
+	idx = 0;
+	ofs = pmatch[0].rm_so;
+	stringlen = pmatch[0].rm_eo;
+	if (ofs >= stringlen)
+		return (REG_NOMATCH);
+	subject = strndup(string, stringlen);
+	if (subject == NULL)
+		return (REG_ESPACE);
+	for (n = 0; ofs < stringlen;) {
+		search = (subject + ofs);
+		if ((unsigned long)pat->len > strlen(search))
+			break;
+		sub = strstr_fn(search, pat->pat);
+		/*
+		 * Ignoring the empty string possibility due to context: grep optimizes
+		 * for empty patterns and will never reach this point.
+		 */
+		if (sub == NULL)
+			break;
+		++n;
+		/* Fill in pmatch if necessary */
+		if (nmatch > 0) {
+			pmatch[idx].rm_so = ofs + (sub - search);
+			pmatch[idx].rm_eo = pmatch[idx].rm_so + pat->len;
+			if (++idx == nmatch)
+				break;
+			ofs = pmatch[idx].rm_so + 1;
+		} else
+			/* We only needed to know if we match or not */
+			break;
+	}
+	free(subject);
+	if (n > 0 && nmatch > 0)
+		for (n = idx; n < nmatch; ++n)
+			pmatch[n].rm_so = pmatch[n].rm_eo = -1;
+
+	return (n > 0 ? 0 : REG_NOMATCH);
+}
+#endif /* WITH_INTERNAL_NOSPEC */
+
 #define iswword(x)	(iswalnum((x)) || (x) == L'_')
 
 /*
@@ -400,6 +464,11 @@ procline(struct parsec *pc)
 		for (i = 0; i < patterns; i++) {
 			pmatch.rm_so = st;
 			pmatch.rm_eo = pc->ln.len;
+#ifdef WITH_INTERNAL_NOSPEC
+			if (grepbehave == GREP_FIXED)
+				r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch);
+			else
+#endif
 #ifndef WITHOUT_FASTMATCH
 			if (fg_pattern[i].pattern)
 				r = fastexec(&fg_pattern[i],



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201709111552.v8BFqOAi005604>