Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 17 Aug 2010 15:07:23 +0000 (UTC)
From:      Jaakko Heinonen <jh@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r211429 - stable/8/usr.bin/uniq
Message-ID:  <201008171507.o7HF7NiU036279@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jh
Date: Tue Aug 17 15:07:23 2010
New Revision: 211429
URL: http://svn.freebsd.org/changeset/base/211429

Log:
  MFC r200633:
  
  Sync getline() with comm(1):
  
   - Prevent overflowing of the buffer length variable in getline() by
     limiting its maximum value.
   - Exit if reallocf(3) fails in getline(). Failure was silently
     considered as end-of-file.
  
  MFC r204803 by ache:
  
  1) Rewrite input processing to not exit with error on the first EILSEQ found
  in the input data but fallback to "binary equal" check instead.
  
  POSIX says: "The input file shall be a text file", nothing more,
  so the text file with illegal sequence is valid input.
  BTW, GNU sort does not fails on EILSEQ too.
  
  2) Speedup input processing a bit in complex cases like skipping fields,
  chars or ignore case.
  
  3) Enforce the implied LINE_MAX limit (from POSIX definition of "text file"
  and POSIX uniq(1) description).
  
  MFC r204811 by ache:
  
  Remove vestiges of old %-format which prevents build on amd64
  
  MFC r204876 by ache:
  
  1) Reimplement (differently) unlimited line length restricted in prev.
  commit.
  
  2) Honor missing the very last \n (if absent) on output.
  
  MFC r204927 by ache:
  
  Add SIZE_MAX overflow check

Modified:
  stable/8/usr.bin/uniq/uniq.c
Directory Properties:
  stable/8/usr.bin/uniq/   (props changed)

Modified: stable/8/usr.bin/uniq/uniq.c
==============================================================================
--- stable/8/usr.bin/uniq/uniq.c	Tue Aug 17 15:02:33 2010	(r211428)
+++ stable/8/usr.bin/uniq/uniq.c	Tue Aug 17 15:07:23 2010	(r211429)
@@ -52,6 +52,8 @@ static const char rcsid[] =
 #include <err.h>
 #include <limits.h>
 #include <locale.h>
+#include <stdint.h>
+#define _WITH_GETLINE
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -59,30 +61,26 @@ static const char rcsid[] =
 #include <wchar.h>
 #include <wctype.h>
 
-#define	MAXLINELEN	(LINE_MAX + 1)
-
-int cflag, dflag, uflag;
+int cflag, dflag, uflag, iflag;
 int numchars, numfields, repeats;
 
 FILE	*file(const char *, const char *);
-wchar_t	*getline(wchar_t *, size_t *, FILE *);
-void	 show(FILE *, wchar_t *);
+wchar_t	*convert(const char *);
+int	 inlcmp(const char *, const char *);
+void	 show(FILE *, const char *);
 wchar_t	*skip(wchar_t *);
 void	 obsolete(char *[]);
 static void	 usage(void);
-int      wcsicoll(wchar_t *, wchar_t *);
 
 int
 main (int argc, char *argv[])
 {
-	wchar_t *t1, *t2;
+	wchar_t *tprev, *tthis;
 	FILE *ifp, *ofp;
-	int ch, b1;
-	size_t prevbuflen, thisbuflen;
-	wchar_t *prevline, *thisline;
-	char *p;
+	int ch, comp;
+	size_t prevbuflen, thisbuflen, b1;
+	char *prevline, *thisline, *p;
 	const char *ifn;
-	int iflag = 0, comp;
 
 	(void) setlocale(LC_ALL, "");
 
@@ -137,48 +135,48 @@ main (int argc, char *argv[])
 	if (argc > 1)
 		ofp = file(argv[1], "w");
 
- 	prevbuflen = MAXLINELEN;
- 	thisbuflen = MAXLINELEN;
- 	prevline = malloc(prevbuflen * sizeof(*prevline));
- 	thisline = malloc(thisbuflen * sizeof(*thisline));
-	if (prevline == NULL || thisline == NULL)
-		err(1, "malloc");
+	prevbuflen = thisbuflen = 0;
+	prevline = thisline = NULL;
 
-	if ((prevline = getline(prevline, &prevbuflen, ifp)) == NULL) {
+	if (getline(&prevline, &prevbuflen, ifp) < 0) {
 		if (ferror(ifp))
 			err(1, "%s", ifn);
 		exit(0);
 	}
+	tprev = convert(prevline);
+
 	if (!cflag && uflag && dflag)
 		show(ofp, prevline);
 
-	while ((thisline = getline(thisline, &thisbuflen, ifp)) != NULL) {
-		/* If requested get the chosen fields + character offsets. */
-		if (numfields || numchars) {
-			t1 = skip(thisline);
-			t2 = skip(prevline);
-		} else {
-			t1 = thisline;
-			t2 = prevline;
-		}
-
-		/* If different, print; set previous to new value. */
-		if (iflag)
-			comp = wcsicoll(t1, t2);
+	tthis = NULL;
+	while (getline(&thisline, &thisbuflen, ifp) >= 0) {
+		if (tthis != NULL)
+			free(tthis);
+		tthis = convert(thisline);
+
+		if (tthis == NULL && tprev == NULL)
+			comp = inlcmp(thisline, prevline);
+		else if (tthis == NULL || tprev == NULL)
+			comp = 1;
 		else
-			comp = wcscoll(t1, t2);
+			comp = wcscoll(tthis, tprev);
 
 		if (comp) {
+			/* If different, print; set previous to new value. */
 			if (cflag || !dflag || !uflag)
 				show(ofp, prevline);
-			t1 = prevline;
+			p = prevline;
 			b1 = prevbuflen;
 			prevline = thisline;
 			prevbuflen = thisbuflen;
+			if (tprev != NULL)
+				free(tprev);
+			tprev = tthis;
 			if (!cflag && uflag && dflag)
 				show(ofp, prevline);
-			thisline = t1;
+			thisline = p;
 			thisbuflen = b1;
+			tthis = NULL;
 			repeats = 0;
 		} else
 			++repeats;
@@ -191,25 +189,55 @@ main (int argc, char *argv[])
 }
 
 wchar_t *
-getline(wchar_t *buf, size_t *buflen, FILE *fp)
+convert(const char *str)
 {
-	size_t bufpos;
-	wint_t ch;
+	size_t n;
+	wchar_t *buf, *ret, *p;
 
-	bufpos = 0;
-	while ((ch = getwc(fp)) != WEOF && ch != '\n') {
-		if (bufpos + 2 >= *buflen) {
-			*buflen = *buflen * 2;
-			buf = reallocf(buf, *buflen * sizeof(*buf));
-			if (buf == NULL)
-				return (NULL);
-		}
-		buf[bufpos++] = ch;
+	if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1)
+		return (NULL);
+	if (SIZE_MAX / sizeof(*buf) < n + 1)
+		errx(1, "conversion buffer length overflow");
+	if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL)
+		err(1, "malloc");
+	if (mbstowcs(buf, str, n + 1) != n)
+		errx(1, "internal mbstowcs() error");
+	/* The last line may not end with \n. */
+	if (n > 0 && buf[n - 1] == L'\n')
+		buf[n - 1] = L'\0';
+
+	/* If requested get the chosen fields + character offsets. */
+	if (numfields || numchars) {
+		if ((ret = wcsdup(skip(buf))) == NULL)
+			err(1, "wcsdup");
+		free(buf);
+	} else
+		ret = buf;
+
+	if (iflag) {
+		for (p = ret; *p != L'\0'; p++)
+			*p = towlower(*p);
 	}
-	if (bufpos + 1 != *buflen)
-		buf[bufpos] = '\0';
 
-	return (bufpos != 0 || ch == '\n' ? buf : NULL);
+	return (ret);
+}
+
+int
+inlcmp(const char *s1, const char *s2)
+{
+	int c1, c2;
+
+	while (*s1 == *s2++)
+		if (*s1++ == '\0')
+			return (0);
+	c1 = (unsigned char)*s1;
+	c2 = (unsigned char)*(s2 - 1);
+	/* The last line may not end with \n. */
+	if (c1 == '\n')
+		c1 = '\0';
+	if (c2 == '\n')
+		c2 = '\0';
+	return (c1 - c2);
 }
 
 /*
@@ -218,13 +246,13 @@ getline(wchar_t *buf, size_t *buflen, FI
  *	of the line.
  */
 void
-show(FILE *ofp, wchar_t *str)
+show(FILE *ofp, const char *str)
 {
 
 	if (cflag)
-		(void)fprintf(ofp, "%4d %ls\n", repeats + 1, str);
+		(void)fprintf(ofp, "%4d %s", repeats + 1, str);
 	if ((dflag && repeats) || (uflag && !repeats))
-		(void)fprintf(ofp, "%ls\n", str);
+		(void)fprintf(ofp, "%s", str);
 }
 
 wchar_t *
@@ -232,13 +260,14 @@ skip(wchar_t *str)
 {
 	int nchars, nfields;
 
-	for (nfields = 0; *str != '\0' && nfields++ != numfields; ) {
+	for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) {
 		while (iswblank(*str))
 			str++;
-		while (*str != '\0' && !iswblank(*str))
+		while (*str != L'\0' && !iswblank(*str))
 			str++;
 	}
-	for (nchars = numchars; nchars-- && *str; ++str);
+	for (nchars = numchars; nchars-- && *str != L'\0'; ++str)
+		;
 	return(str);
 }
 
@@ -288,52 +317,3 @@ usage(void)
 "usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n");
 	exit(1);
 }
-
-static size_t wcsicoll_l1_buflen = 0, wcsicoll_l2_buflen = 0;
-static wchar_t *wcsicoll_l1_buf = NULL, *wcsicoll_l2_buf = NULL;
-
-int
-wcsicoll(wchar_t *s1, wchar_t *s2)
-{
-	wchar_t *p;
-	size_t l1, l2;
-	size_t new_l1_buflen, new_l2_buflen;
-
-	l1 = wcslen(s1) + 1;
-	l2 = wcslen(s2) + 1;
-	new_l1_buflen = wcsicoll_l1_buflen;
-	new_l2_buflen = wcsicoll_l2_buflen;
-	while (new_l1_buflen < l1) {
-		if (new_l1_buflen == 0)
-			new_l1_buflen = MAXLINELEN;
-		else
-			new_l1_buflen *= 2;
-	}
-	while (new_l2_buflen < l2) {
-		if (new_l2_buflen == 0)
-			new_l2_buflen = MAXLINELEN;
-		else
-			new_l2_buflen *= 2;
-	}
-	if (new_l1_buflen > wcsicoll_l1_buflen) {
-		wcsicoll_l1_buf = reallocf(wcsicoll_l1_buf, new_l1_buflen * sizeof(*wcsicoll_l1_buf));
-		if (wcsicoll_l1_buf == NULL)
-                	err(1, "reallocf");
-		wcsicoll_l1_buflen = new_l1_buflen;
-	}
-	if (new_l2_buflen > wcsicoll_l2_buflen) {
-		wcsicoll_l2_buf = reallocf(wcsicoll_l2_buf, new_l2_buflen * sizeof(*wcsicoll_l2_buf));
-		if (wcsicoll_l2_buf == NULL)
-                	err(1, "reallocf");
-		wcsicoll_l2_buflen = new_l2_buflen;
-	}
-
-	for (p = wcsicoll_l1_buf; *s1; s1++)
-		*p++ = towlower(*s1);
-	*p = '\0';
-	for (p = wcsicoll_l2_buf; *s2; s2++)
-		*p++ = towlower(*s2);
-	*p = '\0';
-
-	return (wcscoll(wcsicoll_l1_buf, wcsicoll_l2_buf));
-}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201008171507.o7HF7NiU036279>