Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 4 Jun 2013 00:49:54 GMT
From:      Dave Vasilevsky <dave@vasilevsky.ca>
To:        freebsd-gnats-submit@FreeBSD.org
Subject:   bin/179285: [patch] [ndis] Remove dependency of ndiscvt on libiconv port
Message-ID:  <201306040049.r540nsfr020400@oldred.freebsd.org>
Resent-Message-ID: <201306040100.r54102La090670@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         179285
>Category:       bin
>Synopsis:       [patch] [ndis] Remove dependency of ndiscvt on libiconv port
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Tue Jun 04 01:00:01 UTC 2013
>Closed-Date:
>Last-Modified:
>Originator:     Dave Vasilevsky
>Release:        FreeBSD 10.0-CURRENT amd64
>Organization:
>Environment:
System: FreeBSD current 10.0-CURRENT FreeBSD 10.0-CURRENT #0 r251265: Sun Jun 2 19:
49:56 EDT 2013 root@current:/usr/obj/usr/src/sys/PRODUCTION amd64
>Description:
To use ndisgen/ndiscvt, the user needs to supply a .inf file as input. However, many .inf files use the UTF-16 character encoding, in which case ndiscvt requires the libiconv port to be installed. This presents a chicken-and-egg problem for end users, as they can't install libiconv without internet access, and they can't get internet access without ndis.

>How-To-Repeat:
1. Ensure you have FreeBSD 9.x or CURRENT, for i386 or amd64.
2. Ensure you do not have the libiconv port installed.
3. Download the driver "sp36684.exe" from here: http://h10025.www1.hp.com/ewfrf/wc/softwareDownloadIndex?cc=us&lc=en&dlc=en&softwareitem=ob-53245-1
4. Unpack the .exe file with "unzip". Change into the new directory.
5. Run "ndisgen bcmwl5.inf bcmwl5.sys".

Notice the error message:
"This .INF file appears to be Unicode.
The iconv(1) utility does not appear to be installed.
Please install this utility or convert the .INF file
to ASCII and run this utility again."

>Fix:
The attached patch causes ndiscvt to filter its input, and convert it from UTF-16 if necessary. After applying the patch and rebuilding ndiscvt and ndisgen, running "ndisgen bcmwl5.inf bcmwl5.sys" works.

The patch uses both the Unicode BOM as well as a simple heuristic to attempt to detect UTF-16. It's very unlikely that the detection heuristic could go wrong. Both big- and little-endian UTF-16 are supported, as well as surrogate pair characters.

Patch attached with submission follows:

Index: usr.sbin/ndiscvt/inf-token.l
===================================================================
--- usr.sbin/ndiscvt/inf-token.l	(revision 251347)
+++ usr.sbin/ndiscvt/inf-token.l	(working copy)
@@ -55,6 +55,9 @@
 			lineno++;
 }
 
+static void in_buf(char *buf, yy_size_t *result, int max_size);
+#define YY_INPUT(buf, result, max_size) (in_buf(buf, &result, max_size))
+
 %}
 
 %option nounput
@@ -129,3 +132,142 @@
 {
 	errx(1, "line %d: %s%s %s.", lineno, yytext, yytext?":":"", s);
 }
+
+
+#define UTF8_BYTES 4 /* max bytes in UTF-8 encoding */
+#define BOM_BYTES 2
+static const int utf8_starts[] = { 0x80, 0x800, 0x10000 };
+
+#define SURR1 0xD800
+#define SURR2 0xDC00
+#define SURR_END (SURR2 - SURR1 + SURR2)
+#define SURR_BASE 0x10000
+
+enum encoding { ENC_UNKNOWN, ASCII, UTFBE = 0xfeff, UTFLE = 0xfffe };
+#define DETECT_BYTES 10
+
+/* Read a 16-bit value */
+static int
+in_u16(enum encoding enc) {
+	int a, b;
+
+	a = getc(yyin);
+	b = getc(yyin);
+	if (a == EOF)
+		return EOF;
+	else if (b == EOF)
+		YY_FATAL_ERROR("Truncated UTF-16 character");
+	else if (enc == UTFBE)
+		return ((a << 8) + b);
+	else
+		return ((b << 8) + a);
+}
+
+/* Read a single Unicode codepoint, handling surrogate pairs */
+static int
+in_uchar(enum encoding enc) {
+	int a, b;
+
+	a = in_u16(enc);
+	if (a < SURR1 || a >= SURR_END)
+		return (a);
+
+	/* Surrogate pair */
+	b = in_u16(enc);
+	if ((a >= SURR2 && a < SURR_END) || b < SURR2 || b >= SURR_END)
+		YY_FATAL_ERROR("Malformed UTF-16 surrogate pair");
+	return (SURR_BASE + (a - SURR1) * (SURR2 - SURR1) + b - SURR2);
+}
+
+/* Convert a Unicode codepoint into UTF-8 */
+static int
+in_utf8(char *buf, int cp)
+{
+	int i, ipref, shift;
+	size_t bs, cs; /* total bytes, continuation bytes */
+	
+	for (cs = 0; cs < sizeof(utf8_starts) && cp >= utf8_starts[cs]; cs++)
+		; /* pass */
+
+	bs = cs + 1;
+	if (cs == 0) { /* ascii */
+		buf[0] = cp;
+	} else {
+		ipref = 0xff - (0xff >> bs);
+		for (shift = 6 * cs, i = 0; shift >= 0; shift -= 6, i++) {
+			if (i == 0) /* initial byte */
+				buf[i] = ipref + (cp >> shift);
+			else /* continuation byte */
+				buf[i] = 0x80 + ((cp >> shift) & 0x3f);
+		}
+	}
+	return (bs);
+}
+
+/* Try to detect if yyin is UTF-16 (big- or little-endian) or ASCII */
+static enum encoding
+in_detect(void)
+{
+	unsigned char buf[DETECT_BYTES];
+	int sums[2] = {0, 0};
+	int bom, bytes, i;
+
+	bytes = fread((char*)buf, 1, BOM_BYTES, yyin);
+	if (bytes >= BOM_BYTES) {
+		bom = (buf[0] << 8) + buf[1];
+		if (bom == UTFBE)
+			return (UTFBE); /* discard BOM */
+		else if (bom == UTFLE)
+			return (UTFLE); /* discard BOM */
+	}
+
+	/* Try heuristic to detect UTF-16 */
+	if (bytes == BOM_BYTES)
+		bytes += fread((char*)buf + BOM_BYTES, 1,
+		    DETECT_BYTES - BOM_BYTES, yyin);
+	rewind(yyin);
+	for (i = 0; i < bytes; ++i)
+		sums[i % 2] += buf[i];
+	if (sums[0] == 0)
+		return (UTFBE);
+	if (sums[1] == 0)
+		return (UTFLE);
+	return (ASCII);
+}
+
+static void
+in_buf(char *buf, yy_size_t *result, int max_size)
+{
+	static char outbuf[UTF8_BYTES];
+	static int outsize = 0;
+	static enum encoding enc = ENC_UNKNOWN;
+	int uc;
+
+	if (enc == ENC_UNKNOWN)
+		enc = in_detect();
+
+	for (;;) { /* First try leftovers, then get new data */
+		if (outsize > 0) {
+			int avail = max_size > outsize ? outsize : max_size;
+			memcpy(buf, outbuf, avail);
+			if (outsize > avail)
+				memmove(outbuf, outbuf + avail,
+				    outsize - avail);
+			outsize -= avail;
+			*result = avail;
+			return;
+		}
+
+		if (enc == ASCII) {
+			*result = fread(buf, 1, max_size, yyin);
+			return;
+		}
+
+		uc = in_uchar(enc);
+		if (uc == EOF) {
+			*result = YY_NULL;
+			return;
+		}
+		outsize = in_utf8(outbuf, uc);
+	}
+}
Index: usr.sbin/ndiscvt/ndisgen.sh
===================================================================
--- usr.sbin/ndiscvt/ndisgen.sh	(revision 251347)
+++ usr.sbin/ndiscvt/ndisgen.sh	(working copy)
@@ -119,14 +119,11 @@
 echo "	the installer what devices the driver supports and what registry keys"
 echo "	should be created to control driver configuration. The .SYS file"
 echo "	is the actual driver executable code in Windows(r) Portable Executable"
-echo "	(PE) format. Note that sometimes the .INF file is supplied in Unicode"
-echo "	format. Unicode .INF files must be converted to ASCII form with the"
-echo "	iconv(1) utility before this installer script can use them."
-echo "	Occasionally, a driver may require firmware or register setup"
-echo "	files that are external to the main .SYS file. These are provided"
-echo "	on the same CD with the driver itself, and sometimes have a .BIN"
-echo "	extension, though they can be named almost anything. You will need"
-echo "	these additional files to make your device work with the NDIS"
+echo "	(PE) format. Occasionally, a driver may require firmware or register"
+echo "  setup files that are external to the main .SYS file. These are"
+echo "  provided on the same CD with the driver itself, and sometimes have a"
+echo "  .BIN extension, though they can be named almost anything. You will"
+echo "  need these additional files to make your device work with the NDIS"
 echo "	compatibility system as well."
 echo ""
 echo -n "	Press return to continue... "
@@ -170,15 +167,6 @@
 echo "	- The objcopy(1) utility (part of the base install)."
 echo "	- The ndiscvt(1) utility (part of the base install)."
 echo ""
-echo "	If you happen to end up with a .INF file that's in Unicode format,"
-echo "	then you'll also need:"
-echo ""
-echo "	- The iconv(1) utility."
-echo ""
-echo "	If you have installed the X Window system or some sort of desktop"
-echo "	environment, then iconv(1) should already be present. If not, you"
-echo "	will need to install the libiconv package or port."
-echo ""
 echo -n "	Press return to continue... "
 read KEYPRESS
 return
@@ -210,45 +198,7 @@
 		INFPATH=""
 		return
 	fi
-
-	INFTYPE=`${EGREP} -i -c "Class.*=.*Net" ${INFPATH}`
-	if [ ${INFTYPE} -gt 0 ]; then
-		echo ""
-		echo "	This .INF file appears to be ASCII."
-		echo ""
-		echo -n "	Press return to continue... "
-		read KEYPRESS
-		return
-	fi
-
-	INFTYPE=`${EGREP} -i -c ".C.l.a.s.s.*=.*N.e.t" ${INFPATH}`
-	if [ ${INFTYPE} -gt 0 ]; then
-		echo ""
-		echo "	This .INF file appears to be Unicode."
-		if [ -e ${ICONVPATH} ]; then
-			echo "	Trying to convert to ASCII..."
-			${ICONVPATH} -f utf-16 -t utf-8 ${INFPATH} > ${INFFILE}
-			INFPATH=${INFFILE}
-			echo "	Done."
-			echo ""
-			echo -n "	Press return to continue... "
-			read KEYPRESS
-		else
-			echo "	The iconv(1) utility does not appear to be installed."
-			echo "	Please install this utility or convert the .INF file"
-			echo "	to ASCII and run this utility again."
-			echo ""
-			exit
-		fi
-		return
-	fi
-
-	echo ""
-	echo "	I don't recognize this file format. It may not be a valid .INF file."
-	echo ""
-	echo -n "	Press enter to try again, or ^C to quit. "
-	read KEYPRESS
-	INFPATH=""
+	return
 else
 	echo ""
 	echo "	The file '${INFPATH}' was not found."
@@ -493,7 +443,6 @@
 	return
 }
 
-ICONVPATH=/usr/local/bin/iconv
 NDISCVT=/usr/sbin/ndiscvt
 STUBPATH=/usr/share/misc
 STUBFILE=windrv_stub.c


>Release-Note:
>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201306040049.r540nsfr020400>