Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 18 Sep 2012 14:33:27 +0000 (UTC)
From:      Brooks Davis <brooks@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r240654 - in projects/mtree: include lib/libc/gen
Message-ID:  <201209181433.q8IEXRGn010268@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: brooks
Date: Tue Sep 18 14:33:27 2012
New Revision: 240654
URL: http://svn.freebsd.org/changeset/base/240654

Log:
  Replace our (un)vis(3) implementation with the one NetBSD.  The NetBSD
  versions are:
  
  vis.c 1.44
  vis.3 1.27
  unvis.c 1.39
  unvis.3 1.23
  vis.h 1.19
  
  This version breaks the build due to use of VIS_GLOB in our mtree.

Modified:
  projects/mtree/include/vis.h
  projects/mtree/lib/libc/gen/unvis.3
  projects/mtree/lib/libc/gen/unvis.c
  projects/mtree/lib/libc/gen/vis.3
  projects/mtree/lib/libc/gen/vis.c

Modified: projects/mtree/include/vis.h
==============================================================================
--- projects/mtree/include/vis.h	Tue Sep 18 14:05:35 2012	(r240653)
+++ projects/mtree/include/vis.h	Tue Sep 18 14:33:27 2012	(r240654)
@@ -1,3 +1,5 @@
+/*	$NetBSD: vis.h,v 1.19 2011/03/12 19:52:45 christos Exp $	*/
+
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -27,41 +29,39 @@
  * SUCH DAMAGE.
  *
  *	@(#)vis.h	8.1 (Berkeley) 6/2/93
- * $FreeBSD$
  */
 
 #ifndef _VIS_H_
 #define	_VIS_H_
 
-#include <sys/_types.h>
-
-#ifndef _SIZE_T_DECLARED
-typedef	__size_t	size_t;
-#define	_SIZE_T_DECLARED
-#endif
+#include <sys/types.h>
 
 /*
  * to select alternate encoding format
  */
-#define	VIS_OCTAL	0x01	/* use octal \ddd format */
-#define	VIS_CSTYLE	0x02	/* use \[nrft0..] where appropriate */
+#define	VIS_OCTAL	0x001	/* use octal \ddd format */
+#define	VIS_CSTYLE	0x002	/* use \[nrft0..] where appropiate */
 
 /*
  * to alter set of characters encoded (default is to encode all
  * non-graphic except space, tab, and newline).
  */
-#define	VIS_SP		0x04	/* also encode space */
-#define	VIS_TAB		0x08	/* also encode tab */
-#define	VIS_NL		0x10	/* also encode newline */
+#define	VIS_SP		0x004	/* also encode space */
+#define	VIS_TAB		0x008	/* also encode tab */
+#define	VIS_NL		0x010	/* also encode newline */
 #define	VIS_WHITE	(VIS_SP | VIS_TAB | VIS_NL)
-#define	VIS_SAFE	0x20	/* only encode "unsafe" characters */
+#define	VIS_SAFE	0x020	/* only encode "unsafe" characters */
 
 /*
  * other
  */
-#define	VIS_NOSLASH	0x40	/* inhibit printing '\' */
-#define	VIS_HTTPSTYLE	0x80	/* http-style escape % HEX HEX */
-#define	VIS_GLOB	0x100	/* encode glob(3) magics */
+#define	VIS_NOSLASH	0x040	/* inhibit printing '\' */
+#define	VIS_HTTP1808	0x080	/* http-style escape % hex hex */
+#define	VIS_HTTPSTYLE	0x080	/* http-style escape % hex hex */
+#define	VIS_MIMESTYLE	0x100	/* mime-style escape = HEX HEX */
+#define	VIS_HTTP1866	0x200	/* http-style &#num; or &string; */
+#define	VIS_NOESCAPE	0x400	/* don't decode `\' */
+#define	_VIS_END	0x800	/* for unvis */
 
 /*
  * unvis return codes
@@ -75,17 +75,38 @@ typedef	__size_t	size_t;
 /*
  * unvis flags
  */
-#define	UNVIS_END	1	/* no more characters */
+#define	UNVIS_END	_VIS_END	/* no more characters */
 
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
 char	*vis(char *, int, int, int);
+char	*nvis(char *, size_t, int, int, int);
+
+char	*svis(char *, int, int, int, const char *);
+char	*snvis(char *, size_t, int, int, int, const char *);
+
 int	strvis(char *, const char *, int);
+int	strnvis(char *, size_t, const char *, int);
+
+int	strsvis(char *, const char *, int, const char *);
+int	strsnvis(char *, size_t, const char *, int, const char *);
+
 int	strvisx(char *, const char *, size_t, int);
+int	strnvisx(char *, size_t, const char *, size_t, int);
+
+int	strsvisx(char *, const char *, size_t, int, const char *);
+int	strsnvisx(char *, size_t, const char *, size_t, int, const char *);
+
 int	strunvis(char *, const char *);
+int	strnunvis(char *, size_t, const char *);
+
 int	strunvisx(char *, const char *, int);
-int	unvis(char *, int, int *, int);
+int	strnunvisx(char *, size_t, const char *, int);
+
+#ifndef __LIBC12_SOURCE__
+int	unvis(char *, int, int *, int) __RENAME(__unvis50);
+#endif
 __END_DECLS
 
 #endif /* !_VIS_H_ */

Modified: projects/mtree/lib/libc/gen/unvis.3
==============================================================================
--- projects/mtree/lib/libc/gen/unvis.3	Tue Sep 18 14:05:35 2012	(r240653)
+++ projects/mtree/lib/libc/gen/unvis.3	Tue Sep 18 14:33:27 2012	(r240654)
@@ -1,3 +1,5 @@
+.\"	$NetBSD: unvis.3,v 1.23 2011/03/17 14:06:29 wiz Exp $
+.\"
 .\" Copyright (c) 1989, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
@@ -9,7 +11,7 @@
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
-.\" 4. Neither the name of the University nor the names of its contributors
+.\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
@@ -26,9 +28,8 @@
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)unvis.3	8.2 (Berkeley) 12/11/93
-.\" $FreeBSD$
 .\"
-.Dd December 11, 1993
+.Dd March 12, 2011
 .Dt UNVIS 3
 .Os
 .Sh NAME
@@ -44,7 +45,11 @@
 .Ft int
 .Fn strunvis "char *dst" "const char *src"
 .Ft int
+.Fn strnunvis "char *dst" "size_t dlen" "const char *src"
+.Ft int
 .Fn strunvisx "char *dst" "const char *src" "int flag"
+.Ft int
+.Fn strnunvisx "char *dst" "size_t dlen" "const char *src" "int flag"
 .Sh DESCRIPTION
 The
 .Fn unvis ,
@@ -57,37 +62,37 @@ by the
 .Xr vis 3
 function, back into
 the original form.
-Unvis is called with successive characters in
-.Fa c
-until a valid
-sequence is recognized, at which time the decoded character is
-available at the character pointed to by
-.Fa cp .
-Strunvis decodes the
-characters pointed to by
-.Fa src
-into the buffer pointed to by
-.Fa dst .
+.Pp
+The
+.Fn unvis
+function is called with successive characters in
+.Ar c
+until a valid sequence is recognized, at which time the decoded
+character is available at the character pointed to by
+.Ar cp .
 .Pp
 The
 .Fn strunvis
-function
-simply copies
-.Fa src
+function decodes the characters pointed to by
+.Ar src
+into the buffer pointed to by
+.Ar dst .
+The
+.Fn strunvis
+function simply copies
+.Ar src
 to
-.Fa dst ,
+.Ar dst ,
 decoding any escape sequences along the way,
 and returns the number of characters placed into
-.Fa dst ,
+.Ar dst ,
 or \-1 if an
 invalid escape sequence was detected.
 The size of
-.Fa dst
-should be
-equal to the size of
-.Fa src
-(that is, no expansion takes place during
-decoding).
+.Ar dst
+should be equal to the size of
+.Ar src
+(that is, no expansion takes place during decoding).
 .Pp
 The
 .Fn strunvisx
@@ -95,32 +100,29 @@ function does the same as the
 .Fn strunvis
 function,
 but it allows you to add a flag that specifies the style the string
-.Fa src
+.Ar src
 is encoded with.
-Currently, the only supported flag is
-.Dv VIS_HTTPSTYLE .
+Currently, the supported flags are:
+.Dv VIS_HTTPSTYLE
+and
+.Dv VIS_MIMESTYLE .
 .Pp
 The
 .Fn unvis
-function
-implements a state machine that can be used to decode an arbitrary
-stream of bytes.
-All state associated with the bytes being decoded
-is stored outside the
+function implements a state machine that can be used to decode an
+arbitrary stream of bytes.
+All state associated with the bytes being decoded is stored outside the
 .Fn unvis
 function (that is, a pointer to the state is passed in), so
 calls decoding different streams can be freely intermixed.
-To
-start decoding a stream of bytes, first initialize an integer
-to zero.
+To start decoding a stream of bytes, first initialize an integer to zero.
 Call
 .Fn unvis
 with each successive byte, along with a pointer
 to this integer, and a pointer to a destination character.
 The
 .Fn unvis
-function
-has several return codes that must be handled properly.
+function has several return codes that must be handled properly.
 They are:
 .Bl -tag -width UNVIS_VALIDPUSH
 .It Li \&0 (zero)
@@ -134,29 +136,39 @@ pointed to by cp; however, the character
 be passed in again.
 .It Dv UNVIS_NOCHAR
 A valid sequence was detected, but no character was produced.
-This
-return code is necessary to indicate a logical break between characters.
+This return code is necessary to indicate a logical break between characters.
 .It Dv UNVIS_SYNBAD
-An invalid escape sequence was detected, or the decoder is in an
-unknown state.
+An invalid escape sequence was detected, or the decoder is in an unknown state.
 The decoder is placed into the starting state.
 .El
 .Pp
 When all bytes in the stream have been processed, call
 .Fn unvis
-one more time with
-.Fa flag
-set to
+one more time with flag set to
 .Dv UNVIS_END
 to extract any remaining character (the character passed in is ignored).
 .Pp
 The
-.Fa flag
+.Ar flag
 argument is also used to specify the encoding style of the source.
 If set to
-.Dv VIS_HTTPSTYLE ,
+.Dv VIS_HTTPSTYLE
+or
+.Dv VIS_HTTP1808 ,
 .Fn unvis
 will decode URI strings as specified in RFC 1808.
+If set to
+.Dv VIS_HTTP1866 ,
+.Fn unvis
+will decode URI strings as specified in RFC 1866.
+If set to
+.Dv VIS_MIMESTYLE ,
+.Fn unvis
+will decode MIME Quoted-Printable strings as specified in RFC 2045.
+If set to
+.Dv VIS_NOESCAPE ,
+.Fn unvis
+will not decode \e quoted characters.
 .Pp
 The following code fragment illustrates a proper use of
 .Fn unvis .
@@ -166,25 +178,51 @@ char out;
 
 while ((ch = getchar()) != EOF) {
 again:
-	switch(unvis(&out, ch, &state, 0)) {
+	switch(unvis(\*[Am]out, ch, \*[Am]state, 0)) {
 	case 0:
 	case UNVIS_NOCHAR:
 		break;
 	case UNVIS_VALID:
-		(void) putchar(out);
+		(void)putchar(out);
 		break;
 	case UNVIS_VALIDPUSH:
-		(void) putchar(out);
+		(void)putchar(out);
 		goto again;
 	case UNVIS_SYNBAD:
-		(void)fprintf(stderr, "bad sequence!\en");
-	exit(1);
+		errx(EXIT_FAILURE, "Bad character sequence!");
 	}
 }
-if (unvis(&out, (char)0, &state, UNVIS_END) == UNVIS_VALID)
-	(void) putchar(out);
+if (unvis(\*[Am]out, '\e0', \*[Am]state, UNVIS_END) == UNVIS_VALID)
+	(void)putchar(out);
 .Ed
+.Sh ERRORS
+The functions
+.Fn strunvis ,
+.Fn strnunvis ,
+.Fn strunvisx ,
+and
+.Fn strnunvisx
+will return \-1 on error and set
+.Va errno 
+to:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+An invalid escape sequence was detected, or the decoder is in an unknown state.
+.El
+.Pp
+In addition the functions
+.Fn strnunvis 
+and
+.Fn strnunvisx
+will can also set
+.Va errno
+on error to:
+.Bl -tag -width Er
+.It Bq Er ENOSPC
+Not enough space to perform the conversion.
+.El
 .Sh SEE ALSO
+.Xr unvis 1 ,
 .Xr vis 1 ,
 .Xr vis 3
 .Rs
@@ -198,3 +236,9 @@ The
 function
 first appeared in
 .Bx 4.4 .
+The
+.Fn strnunvis
+and
+.Fn strnunvisx
+functions appeared in
+.Nx 6.0 .

Modified: projects/mtree/lib/libc/gen/unvis.c
==============================================================================
--- projects/mtree/lib/libc/gen/unvis.c	Tue Sep 18 14:05:35 2012	(r240653)
+++ projects/mtree/lib/libc/gen/unvis.c	Tue Sep 18 14:33:27 2012	(r240654)
@@ -1,3 +1,5 @@
+/*	$NetBSD: unvis.c,v 1.39 2012/03/13 21:13:37 christos Exp $	*/
+
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -10,7 +12,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -27,16 +29,30 @@
  * SUCH DAMAGE.
  */
 
+#include <sys/cdefs.h>
 #if defined(LIBC_SCCS) && !defined(lint)
+#if 0
 static char sccsid[] = "@(#)unvis.c	8.1 (Berkeley) 6/4/93";
+#else
+__RCSID("$NetBSD: unvis.c,v 1.39 2012/03/13 21:13:37 christos Exp $");
+#endif
 #endif /* LIBC_SCCS and not lint */
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
 
+#include "namespace.h"
 #include <sys/types.h>
+
+#include <assert.h>
 #include <ctype.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <errno.h>
 #include <vis.h>
 
+#ifdef __weak_alias
+__weak_alias(strnunvisx,_strnunvisx)
+#endif
+
+#if !HAVE_VIS
 /*
  * decode driven by state machine
  */
@@ -47,12 +63,128 @@ __FBSDID("$FreeBSD$");
 #define	S_CTRL		4	/* control char started (^) */
 #define	S_OCTAL2	5	/* octal digit 2 */
 #define	S_OCTAL3	6	/* octal digit 3 */
-#define	S_HEX2		7	/* hex digit 2 */
-
-#define	S_HTTP		0x080	/* %HEXHEX escape */
+#define	S_HEX		7	/* mandatory hex digit */
+#define	S_HEX1		8	/* http hex digit */
+#define	S_HEX2		9	/* http hex digit 2 */
+#define	S_MIME1		10	/* mime hex digit 1 */
+#define	S_MIME2		11	/* mime hex digit 2 */
+#define	S_EATCRNL	12	/* mime eating CRNL */
+#define	S_AMP		13	/* seen & */
+#define	S_NUMBER	14	/* collecting number */
+#define	S_STRING	15	/* collecting string */
 
 #define	isoctal(c)	(((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
-#define	ishex(c)	((((u_char)(c)) >= '0' && ((u_char)(c)) <= '9') || (((u_char)(c)) >= 'a' && ((u_char)(c)) <= 'f'))
+#define	xtod(c)		(isdigit(c) ? (c - '0') : ((tolower(c) - 'a') + 10))
+#define	XTOD(c)		(isdigit(c) ? (c - '0') : ((c - 'A') + 10))
+
+/*
+ * RFC 1866
+ */
+static const struct nv {
+	const char *name;
+	uint8_t value;
+} nv[] = {
+	{ "AElig",	198 }, /* capital AE diphthong (ligature)  */
+	{ "Aacute",	193 }, /* capital A, acute accent  */
+	{ "Acirc",	194 }, /* capital A, circumflex accent  */
+	{ "Agrave",	192 }, /* capital A, grave accent  */
+	{ "Aring",	197 }, /* capital A, ring  */
+	{ "Atilde",	195 }, /* capital A, tilde  */
+	{ "Auml",	196 }, /* capital A, dieresis or umlaut mark  */
+	{ "Ccedil",	199 }, /* capital C, cedilla  */
+	{ "ETH",	208 }, /* capital Eth, Icelandic  */
+	{ "Eacute",	201 }, /* capital E, acute accent  */
+	{ "Ecirc",	202 }, /* capital E, circumflex accent  */
+	{ "Egrave",	200 }, /* capital E, grave accent  */
+	{ "Euml",	203 }, /* capital E, dieresis or umlaut mark  */
+	{ "Iacute",	205 }, /* capital I, acute accent  */
+	{ "Icirc",	206 }, /* capital I, circumflex accent  */
+	{ "Igrave",	204 }, /* capital I, grave accent  */
+	{ "Iuml",	207 }, /* capital I, dieresis or umlaut mark  */
+	{ "Ntilde",	209 }, /* capital N, tilde  */
+	{ "Oacute",	211 }, /* capital O, acute accent  */
+	{ "Ocirc",	212 }, /* capital O, circumflex accent  */
+	{ "Ograve",	210 }, /* capital O, grave accent  */
+	{ "Oslash",	216 }, /* capital O, slash  */
+	{ "Otilde",	213 }, /* capital O, tilde  */
+	{ "Ouml",	214 }, /* capital O, dieresis or umlaut mark  */
+	{ "THORN",	222 }, /* capital THORN, Icelandic  */
+	{ "Uacute",	218 }, /* capital U, acute accent  */
+	{ "Ucirc",	219 }, /* capital U, circumflex accent  */
+	{ "Ugrave",	217 }, /* capital U, grave accent  */
+	{ "Uuml",	220 }, /* capital U, dieresis or umlaut mark  */
+	{ "Yacute",	221 }, /* capital Y, acute accent  */
+	{ "aacute",	225 }, /* small a, acute accent  */
+	{ "acirc",	226 }, /* small a, circumflex accent  */
+	{ "acute",	180 }, /* acute accent  */
+	{ "aelig",	230 }, /* small ae diphthong (ligature)  */
+	{ "agrave",	224 }, /* small a, grave accent  */
+	{ "amp",	 38 }, /* ampersand  */
+	{ "aring",	229 }, /* small a, ring  */
+	{ "atilde",	227 }, /* small a, tilde  */
+	{ "auml",	228 }, /* small a, dieresis or umlaut mark  */
+	{ "brvbar",	166 }, /* broken (vertical) bar  */
+	{ "ccedil",	231 }, /* small c, cedilla  */
+	{ "cedil",	184 }, /* cedilla  */
+	{ "cent",	162 }, /* cent sign  */
+	{ "copy",	169 }, /* copyright sign  */
+	{ "curren",	164 }, /* general currency sign  */
+	{ "deg",	176 }, /* degree sign  */
+	{ "divide",	247 }, /* divide sign  */
+	{ "eacute",	233 }, /* small e, acute accent  */
+	{ "ecirc",	234 }, /* small e, circumflex accent  */
+	{ "egrave",	232 }, /* small e, grave accent  */
+	{ "eth",	240 }, /* small eth, Icelandic  */
+	{ "euml",	235 }, /* small e, dieresis or umlaut mark  */
+	{ "frac12",	189 }, /* fraction one-half  */
+	{ "frac14",	188 }, /* fraction one-quarter  */
+	{ "frac34",	190 }, /* fraction three-quarters  */
+	{ "gt",		 62 }, /* greater than  */
+	{ "iacute",	237 }, /* small i, acute accent  */
+	{ "icirc",	238 }, /* small i, circumflex accent  */
+	{ "iexcl",	161 }, /* inverted exclamation mark  */
+	{ "igrave",	236 }, /* small i, grave accent  */
+	{ "iquest",	191 }, /* inverted question mark  */
+	{ "iuml",	239 }, /* small i, dieresis or umlaut mark  */
+	{ "laquo",	171 }, /* angle quotation mark, left  */
+	{ "lt",		 60 }, /* less than  */
+	{ "macr",	175 }, /* macron  */
+	{ "micro",	181 }, /* micro sign  */
+	{ "middot",	183 }, /* middle dot  */
+	{ "nbsp",	160 }, /* no-break space  */
+	{ "not",	172 }, /* not sign  */
+	{ "ntilde",	241 }, /* small n, tilde  */
+	{ "oacute",	243 }, /* small o, acute accent  */
+	{ "ocirc",	244 }, /* small o, circumflex accent  */
+	{ "ograve",	242 }, /* small o, grave accent  */
+	{ "ordf",	170 }, /* ordinal indicator, feminine  */
+	{ "ordm",	186 }, /* ordinal indicator, masculine  */
+	{ "oslash",	248 }, /* small o, slash  */
+	{ "otilde",	245 }, /* small o, tilde  */
+	{ "ouml",	246 }, /* small o, dieresis or umlaut mark  */
+	{ "para",	182 }, /* pilcrow (paragraph sign)  */
+	{ "plusmn",	177 }, /* plus-or-minus sign  */
+	{ "pound",	163 }, /* pound sterling sign  */
+	{ "quot",	 34 }, /* double quote  */
+	{ "raquo",	187 }, /* angle quotation mark, right  */
+	{ "reg",	174 }, /* registered sign  */
+	{ "sect",	167 }, /* section sign  */
+	{ "shy",	173 }, /* soft hyphen  */
+	{ "sup1",	185 }, /* superscript one  */
+	{ "sup2",	178 }, /* superscript two  */
+	{ "sup3",	179 }, /* superscript three  */
+	{ "szlig",	223 }, /* small sharp s, German (sz ligature)  */
+	{ "thorn",	254 }, /* small thorn, Icelandic  */
+	{ "times",	215 }, /* multiply sign  */
+	{ "uacute",	250 }, /* small u, acute accent  */
+	{ "ucirc",	251 }, /* small u, circumflex accent  */
+	{ "ugrave",	249 }, /* small u, grave accent  */
+	{ "uml",	168 }, /* umlaut (dieresis)  */
+	{ "uuml",	252 }, /* small u, dieresis or umlaut mark  */
+	{ "yacute",	253 }, /* small y, acute accent  */
+	{ "yen",	165 }, /* yen sign  */
+	{ "yuml",	255 }, /* small y, dieresis or umlaut mark  */
+};
 
 /*
  * unvis - decode characters previously encoded by vis
@@ -60,234 +192,362 @@ __FBSDID("$FreeBSD$");
 int
 unvis(char *cp, int c, int *astate, int flag)
 {
+	unsigned char uc = (unsigned char)c;
+	unsigned char st, ia, is, lc;
+
+/*
+ * Bottom 8 bits of astate hold the state machine state.
+ * Top 8 bits hold the current character in the http 1866 nv string decoding
+ */
+#define GS(a)		((a) & 0xff)
+#define SS(a, b)	(((uint32_t)(a) << 24) | (b))
+#define GI(a)		((uint32_t)(a) >> 24)
+
+	_DIAGASSERT(cp != NULL);
+	_DIAGASSERT(astate != NULL);
+	st = GS(*astate);
 
 	if (flag & UNVIS_END) {
-		if (*astate == S_OCTAL2 || *astate == S_OCTAL3) {
-			*astate = S_GROUND;
-			return (UNVIS_VALID);
+		switch (st) {
+		case S_OCTAL2:
+		case S_OCTAL3:
+		case S_HEX2:
+			*astate = SS(0, S_GROUND);
+			return UNVIS_VALID;
+		case S_GROUND:
+			return UNVIS_NOCHAR;
+		default:
+			return UNVIS_SYNBAD;
 		}
-		return (*astate == S_GROUND ? UNVIS_NOCHAR : UNVIS_SYNBAD);
 	}
 
-	switch (*astate & ~S_HTTP) {
+	switch (st) {
 
 	case S_GROUND:
 		*cp = 0;
-		if (c == '\\') {
-			*astate = S_START;
-			return (0);
-		}
-		if (flag & VIS_HTTPSTYLE && c == '%') {
-			*astate = S_START | S_HTTP;
-			return (0);
+		if ((flag & VIS_NOESCAPE) == 0 && c == '\\') {
+			*astate = SS(0, S_START);
+			return UNVIS_NOCHAR;
+		}
+		if ((flag & VIS_HTTP1808) && c == '%') {
+			*astate = SS(0, S_HEX1);
+			return UNVIS_NOCHAR;
+		}
+		if ((flag & VIS_HTTP1866) && c == '&') {
+			*astate = SS(0, S_AMP);
+			return UNVIS_NOCHAR;
+		}
+		if ((flag & VIS_MIMESTYLE) && c == '=') {
+			*astate = SS(0, S_MIME1);
+			return UNVIS_NOCHAR;
 		}
 		*cp = c;
-		return (UNVIS_VALID);
+		return UNVIS_VALID;
 
 	case S_START:
-		if (*astate & S_HTTP) {
-		    if (ishex(tolower(c))) {
-			*cp = isdigit(c) ? (c - '0') : (tolower(c) - 'a');
-			*astate = S_HEX2;
-			return (0);
-		    }
-		}
 		switch(c) {
 		case '\\':
 			*cp = c;
-			*astate = S_GROUND;
-			return (UNVIS_VALID);
+			*astate = SS(0, S_GROUND);
+			return UNVIS_VALID;
 		case '0': case '1': case '2': case '3':
 		case '4': case '5': case '6': case '7':
 			*cp = (c - '0');
-			*astate = S_OCTAL2;
-			return (0);
+			*astate = SS(0, S_OCTAL2);
+			return UNVIS_NOCHAR;
 		case 'M':
-			*cp = 0200;
-			*astate = S_META;
-			return (0);
+			*cp = (char)0200;
+			*astate = SS(0, S_META);
+			return UNVIS_NOCHAR;
 		case '^':
-			*astate = S_CTRL;
-			return (0);
+			*astate = SS(0, S_CTRL);
+			return UNVIS_NOCHAR;
 		case 'n':
 			*cp = '\n';
-			*astate = S_GROUND;
-			return (UNVIS_VALID);
+			*astate = SS(0, S_GROUND);
+			return UNVIS_VALID;
 		case 'r':
 			*cp = '\r';
-			*astate = S_GROUND;
-			return (UNVIS_VALID);
+			*astate = SS(0, S_GROUND);
+			return UNVIS_VALID;
 		case 'b':
 			*cp = '\b';
-			*astate = S_GROUND;
-			return (UNVIS_VALID);
+			*astate = SS(0, S_GROUND);
+			return UNVIS_VALID;
 		case 'a':
 			*cp = '\007';
-			*astate = S_GROUND;
-			return (UNVIS_VALID);
+			*astate = SS(0, S_GROUND);
+			return UNVIS_VALID;
 		case 'v':
 			*cp = '\v';
-			*astate = S_GROUND;
-			return (UNVIS_VALID);
+			*astate = SS(0, S_GROUND);
+			return UNVIS_VALID;
 		case 't':
 			*cp = '\t';
-			*astate = S_GROUND;
-			return (UNVIS_VALID);
+			*astate = SS(0, S_GROUND);
+			return UNVIS_VALID;
 		case 'f':
 			*cp = '\f';
-			*astate = S_GROUND;
-			return (UNVIS_VALID);
+			*astate = SS(0, S_GROUND);
+			return UNVIS_VALID;
 		case 's':
 			*cp = ' ';
-			*astate = S_GROUND;
-			return (UNVIS_VALID);
+			*astate = SS(0, S_GROUND);
+			return UNVIS_VALID;
 		case 'E':
 			*cp = '\033';
-			*astate = S_GROUND;
-			return (UNVIS_VALID);
+			*astate = SS(0, S_GROUND);
+			return UNVIS_VALID;
+		case 'x':
+			*astate = SS(0, S_HEX);
+			return UNVIS_NOCHAR;
 		case '\n':
 			/*
 			 * hidden newline
 			 */
-			*astate = S_GROUND;
-			return (UNVIS_NOCHAR);
+			*astate = SS(0, S_GROUND);
+			return UNVIS_NOCHAR;
 		case '$':
 			/*
 			 * hidden marker
 			 */
-			*astate = S_GROUND;
-			return (UNVIS_NOCHAR);
+			*astate = SS(0, S_GROUND);
+			return UNVIS_NOCHAR;
 		}
-		*astate = S_GROUND;
-		return (UNVIS_SYNBAD);
+		goto bad;
 
 	case S_META:
 		if (c == '-')
-			*astate = S_META1;
+			*astate = SS(0, S_META1);
 		else if (c == '^')
-			*astate = S_CTRL;
-		else {
-			*astate = S_GROUND;
-			return (UNVIS_SYNBAD);
-		}
-		return (0);
+			*astate = SS(0, S_CTRL);
+		else 
+			goto bad;
+		return UNVIS_NOCHAR;
 
 	case S_META1:
-		*astate = S_GROUND;
+		*astate = SS(0, S_GROUND);
 		*cp |= c;
-		return (UNVIS_VALID);
+		return UNVIS_VALID;
 
 	case S_CTRL:
 		if (c == '?')
 			*cp |= 0177;
 		else
 			*cp |= c & 037;
-		*astate = S_GROUND;
-		return (UNVIS_VALID);
+		*astate = SS(0, S_GROUND);
+		return UNVIS_VALID;
 
 	case S_OCTAL2:	/* second possible octal digit */
-		if (isoctal(c)) {
+		if (isoctal(uc)) {
 			/*
 			 * yes - and maybe a third
 			 */
 			*cp = (*cp << 3) + (c - '0');
-			*astate = S_OCTAL3;
-			return (0);
+			*astate = SS(0, S_OCTAL3);
+			return UNVIS_NOCHAR;
 		}
 		/*
 		 * no - done with current sequence, push back passed char
 		 */
-		*astate = S_GROUND;
-		return (UNVIS_VALIDPUSH);
+		*astate = SS(0, S_GROUND);
+		return UNVIS_VALIDPUSH;
 
 	case S_OCTAL3:	/* third possible octal digit */
-		*astate = S_GROUND;
-		if (isoctal(c)) {
+		*astate = SS(0, S_GROUND);
+		if (isoctal(uc)) {
 			*cp = (*cp << 3) + (c - '0');
-			return (UNVIS_VALID);
+			return UNVIS_VALID;
 		}
 		/*
 		 * we were done, push back passed char
 		 */
-		return (UNVIS_VALIDPUSH);
+		return UNVIS_VALIDPUSH;
 
-	case S_HEX2:	/* second mandatory hex digit */
-		if (ishex(tolower(c))) {
-			*cp = (isdigit(c) ? (*cp << 4) + (c - '0') : (*cp << 4) + (tolower(c) - 'a' + 10));
+	case S_HEX:
+		if (!isxdigit(uc))
+			goto bad;
+		/*FALLTHROUGH*/
+	case S_HEX1:
+		if (isxdigit(uc)) {
+			*cp = xtod(uc);
+			*astate = SS(0, S_HEX2);
+			return UNVIS_NOCHAR;
 		}
+		/*
+		 * no - done with current sequence, push back passed char
+		 */
+		*astate = SS(0, S_GROUND);
+		return UNVIS_VALIDPUSH;
+
+	case S_HEX2:
 		*astate = S_GROUND;
-		return (UNVIS_VALID);
+		if (isxdigit(uc)) {
+			*cp = xtod(uc) | (*cp << 4);
+			return UNVIS_VALID;
+		}
+		return UNVIS_VALIDPUSH;
+
+	case S_MIME1:
+		if (uc == '\n' || uc == '\r') {
+			*astate = SS(0, S_EATCRNL);
+			return UNVIS_NOCHAR;
+		}
+		if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) {
+			*cp = XTOD(uc);
+			*astate = SS(0, S_MIME2);
+			return UNVIS_NOCHAR;
+		}
+		goto bad;
+
+	case S_MIME2:
+		if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) {
+			*astate = SS(0, S_GROUND);
+			*cp = XTOD(uc) | (*cp << 4);
+			return UNVIS_VALID;
+		}
+		goto bad;
+
+	case S_EATCRNL:
+		switch (uc) {
+		case '\r':
+		case '\n':
+			return UNVIS_NOCHAR;
+		case '=':
+			*astate = SS(0, S_MIME1);
+			return UNVIS_NOCHAR;
+		default:
+			*cp = uc;
+			*astate = SS(0, S_GROUND);
+			return UNVIS_VALID;
+		}
+
+	case S_AMP:
+		*cp = 0;
+		if (uc == '#') {
+			*astate = SS(0, S_NUMBER);
+			return UNVIS_NOCHAR;
+		}
+		*astate = SS(0, S_STRING);
+		/*FALLTHROUGH*/
+
+	case S_STRING:
+		ia = *cp;		/* index in the array */
+		is = GI(*astate);	/* index in the string */
+		lc = is == 0 ? 0 : nv[ia].name[is - 1];	/* last character */
+
+		if (uc == ';')
+			uc = '\0';
+
+		for (; ia < __arraycount(nv); ia++) {
+			if (is != 0 && nv[ia].name[is - 1] != lc)
+				goto bad;
+			if (nv[ia].name[is] == uc)
+				break;
+		}
+
+		if (ia == __arraycount(nv))
+			goto bad;
+
+		if (uc != 0) {
+			*cp = ia;
+			*astate = SS(is + 1, S_STRING);
+			return UNVIS_NOCHAR;
+		}
+
+		*cp = nv[ia].value;
+		*astate = SS(0, S_GROUND);
+		return UNVIS_VALID;
+
+	case S_NUMBER:
+		if (uc == ';')
+			return UNVIS_VALID;
+		if (!isdigit(uc))
+			goto bad;
+		*cp += (*cp * 10) + uc - '0';
+		return UNVIS_NOCHAR;
 
 	default:
+	bad:
 		/*
 		 * decoder in unknown state - (probably uninitialized)
 		 */
-		*astate = S_GROUND;
-		return (UNVIS_SYNBAD);
+		*astate = SS(0, S_GROUND);
+		return UNVIS_SYNBAD;
 	}
 }
 
 /*
- * strunvis - decode src into dst
+ * strnunvisx - decode src into dst
  *
  *	Number of chars decoded into dst is returned, -1 on error.
  *	Dst is null terminated.
  */
 
 int
-strunvis(char *dst, const char *src)
+strnunvisx(char *dst, size_t dlen, const char *src, int flag)
 {
 	char c;
-	char *start = dst;
+	char t = '\0', *start = dst;
 	int state = 0;
 
-	while ( (c = *src++) ) {
-	again:
-		switch (unvis(dst, c, &state, 0)) {
+	_DIAGASSERT(src != NULL);
+	_DIAGASSERT(dst != NULL);
+#define CHECKSPACE() \
+	do { \
+		if (dlen-- == 0) { \
+			errno = ENOSPC; \
+			return -1; \
+		} \
+	} while (/*CONSTCOND*/0)
+
+	while ((c = *src++) != '\0') {
+ again:
+		switch (unvis(&t, c, &state, flag)) {
 		case UNVIS_VALID:
-			dst++;
+			CHECKSPACE();
+			*dst++ = t;
 			break;
 		case UNVIS_VALIDPUSH:
-			dst++;
+			CHECKSPACE();
+			*dst++ = t;
 			goto again;
 		case 0:
 		case UNVIS_NOCHAR:
 			break;
+		case UNVIS_SYNBAD:
+			errno = EINVAL;
+			return -1;
 		default:
-			return (-1);
+			_DIAGASSERT(/*CONSTCOND*/0);
+			errno = EINVAL;
+			return -1;
 		}
 	}
-	if (unvis(dst, c, &state, UNVIS_END) == UNVIS_VALID)
-		dst++;
+	if (unvis(&t, c, &state, UNVIS_END) == UNVIS_VALID) {
+		CHECKSPACE();
+		*dst++ = t;
+	}
+	CHECKSPACE();
 	*dst = '\0';
-	return (dst - start);
+	return (int)(dst - start);
 }
 
 int
 strunvisx(char *dst, const char *src, int flag)
 {
-	char c;
-	char *start = dst;
-	int state = 0;
-    
-	while ( (c = *src++) ) {
-	again:
-		switch (unvis(dst, c, &state, flag)) {
-		case UNVIS_VALID:
-			dst++;
-			break;
-		case UNVIS_VALIDPUSH:
-			dst++;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201209181433.q8IEXRGn010268>