Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 29 Jan 2002 21:31:13 -0800
From:      Bill Fenner <fenner@research.att.com>
To:        standards@freebsd.org
Subject:   scanf(3) patches for review
Message-ID:  <200201300531.g0U5VEh48095@stash.attlabs.att.com>

next in thread | raw e-mail | index | archive | help

--19701020
Content-Type: text/plain; charset=US-ASCII
Content-Disposition: inline


Here are some long-standing scanf(3) patches, which add the new c99
size modifiers to scanf(3).  They've been languishing in my tree waiting
for me to get around to implementing %n$, but I am clearly not getting
to it so there's nothing to win by sitting on 'em.

One thing that I haven't decided yet is whether it makes sense to
rewrite the size modifier narrative to a table in the same way as
I did for printf(3).  Input is solicited.

Thanks,
  Bill


--19701020
Content-Type: text/plain; name="scanf.diff"; x-unix-mode=0664
Content-Disposition: attachment; filename="scanf.diff"

Index: scanf.3
===================================================================
RCS file: /home/ncvs/src/lib/libc/stdio/scanf.3,v
retrieving revision 1.13
diff -u -r1.13 scanf.3
--- scanf.3	1 Oct 2001 16:08:59 -0000	1.13
+++ scanf.3	30 Jan 2002 05:04:55 -0000
@@ -137,11 +137,20 @@
 there may be a number of
 .Em flag
 characters, as follows:
-.Bl -tag -width indent
+.Bl -tag -width ".Cm l No (ell)"
 .It Cm *
 Suppresses assignment.
 The conversion that follows occurs as usual, but no pointer is used;
 the result of the conversion is simply discarded.
+.It Cm hh
+Indicates that the conversion will be one of
+.Cm dioux
+or
+.Cm n
+and the next pointer is a pointer to a
+.Em char
+(rather than
+.Em int ) .
 .It Cm h
 Indicates that the conversion will be one of
 .Cm dioux
@@ -151,7 +160,7 @@
 .Em short  int
 (rather than
 .Em int ) .
-.It Cm l
+.It Cm l No (ell)
 Indicates either that the conversion will be one of
 .Cm dioux
 or
@@ -160,21 +169,66 @@
 .Em long  int
 (rather than
 .Em int ) ,
-or that the conversion will be one of
-.Cm efg
+that the conversion will be one of
+.Cm aefg
 and the next pointer is a pointer to
 .Em double
 (rather than
-.Em float ) .
+.Em float ) ,
+orthat the conversion will be one of
+.Cm c
+or
+.Cm s
+and the next pointer is a pointer to an array of
+.Em wchar_t
+(rather than
+.Em char ) .
+.It Cm ll No (ell ell)
+Indicates either that the conversion will be one of
+.Cm dioux
+or
+.Cm n
+and the next pointer is a pointer to a
+.Em long long int
+(rather than
+.Em int ) .
 .It Cm L
 Indicates that the conversion will be
-.Cm efg
+.Cm aefg
 and the next pointer is a pointer to
 .Em long double .
 (This type is not implemented; the
 .Cm L
 flag is currently ignored.)
+.It Cm j
+Indicates either that the conversion will be one of
+.Cm dioux
+or
+.Cm n
+and the next pointer is a pointer to a
+.Em intmax_t
+(rather than
+.Em int ) .
+.It Cm t
+Indicates either that the conversion will be one of
+.Cm dioux
+or
+.Cm n
+and the next pointer is a pointer to a
+.Em ptrdiff_t
+(rather than
+.Em int ) .
+.It Cm z
+Indicates either that the conversion will be one of
+.Cm dioux
+or
+.Cm n
+and the next pointer is a pointer to a
+.Em size_t
+(rather than
+.Em int ) .
 .It Cm q
+(deprecated.)
 Indicates either that the conversion will be one of
 .Cm dioux
 or
@@ -182,7 +236,7 @@
 and the next pointer is a pointer to a
 .Em long long int
 (rather than
-.Em int ) ,
+.Em int ) .
 .El
 .Pp
 In addition to these flags,
@@ -210,10 +264,6 @@
 Matches an optionally signed decimal integer;
 the next pointer must be a pointer to
 .Em int .
-.It Cm D
-Equivalent to
-.Cm ld ;
-this exists only for backwards compatibility.
 .It Cm i
 Matches an optionally signed integer;
 the next pointer must be a pointer to
@@ -231,48 +281,37 @@
 Matches an octal integer;
 the next pointer must be a pointer to
 .Em unsigned int .
-.It Cm O
-Equivalent to
-.Cm lo ;
-this exists for backwards compatibility.
 .It Cm u
 Matches an optionally signed decimal integer;
 the next pointer must be a pointer to
 .Em unsigned int .
-.It Cm x
+.It Cm x , X
 Matches an optionally signed hexadecimal integer;
 the next pointer must be a pointer to
 .Em unsigned int .
-.It Cm X
-Equivalent to
-.Cm lx ;
-this violates the
-.St -isoC ,
-but is backwards compatible with previous
-.Ux
-systems.
-.It Cm f
+.It Cm e , E , f , F , g , G
 Matches an optionally signed floating-point number;
 the next pointer must be a pointer to
 .Em float .
-.It Cm e
-Equivalent to
-.Cm f .
-.It Cm g
-Equivalent to
-.Cm f .
-.It Cm E
-Equivalent to
-.Cm lf ;
-this violates the
-.St -isoC ,
-but is backwards compatible with previous
-.Ux
-systems.
-.It Cm F
-Equivalent to
-.Cm lf ;
-this exists only for backwards compatibility.
+.It Cm a , A
+Matches a hexadecimal number represented in the style
+.Sm off
+.Oo \- Oc Li 0x Ar h Li \&. Ar hhhp Oo \\*[Pm] Oc Ar d .
+.Sm off
+This is an exact coversion of the mantissa+exponent internal
+floating point representation; the
+.Sm off
+.Oo \- Oc Li 0x Ar h Li \&. Ar hhh
+.Sm on
+portion represents exactly the mantissa; only denormalized
+mantissas have a zero value to the left of the hexadecimal
+point.
+The
+.Cm p
+is a literal character
+.Ql p ;
+the exponent is preceded by a positive or negative sign
+and is represented in decimal.
 .It Cm s
 Matches a sequence of non-white-space characters;
 the next pointer must be a pointer to
@@ -283,6 +322,9 @@
 character.
 The input string stops at white space
 or at the maximum field width, whichever occurs first.
+.It Cm S
+The same as
+.Cm ls .
 .It Cm c
 Matches a sequence of
 .Em width
@@ -296,6 +338,9 @@
 is added).
 The usual skip of leading white space is suppressed.
 To skip white space first, use an explicit space in the format.
+.It Cm C
+The same as
+.Cm lc .
 .It Cm \&[
 Matches a nonempty sequence of characters from the specified set
 of accepted characters;
@@ -363,29 +408,10 @@
 .Dv LC_NUMERIC ) .
 .Pp
 For backwards compatibility,
-other conversion characters (except
-.Ql \e0 )
-are taken as if they were
-.Ql %d
-or, if uppercase,
-.Ql %ld ,
-and a `conversion' of
+a `conversion' of
 .Ql %\e0
 causes an immediate return of
 .Dv EOF .
-The
-.Cm F
-and
-.Cm X
-conversions will be changed in the future
-to conform to the
-.Tn ANSI
-C standard,
-after which they will act like
-.Cm f
-and
-.Cm x
-respectively.
 .Sh RETURN VALUES
 These
 functions
@@ -419,7 +445,7 @@
 and
 .Fn sscanf
 conform to
-.St -isoC .
+.St -isoC-99 .
 .Sh HISTORY
 The functions
 .Fn vscanf ,
@@ -428,13 +454,21 @@
 .Fn vfscanf
 are new to this release.
 .Sh BUGS
-The current situation with
-.Cm %F
+Earlier implementations of
+.Nm
+treated
+.Cm \&%D , \&%E , \&%F , \&%O
 and
-.Cm %X
-conversions is unfortunate.
-.Pp
-All of the backwards compatibility formats will be removed in the future.
+.Cm \&%X
+as their lowercase equivalents with an
+.Cm l
+modifier.  In addition,
+.Nm
+treated an unknown conversion character as
+.Cm \&%d
+or
+.Cm \&%D ,
+depending on its case.  This functionality has been removed.
 .Pp
 Numerical strings are truncated to 512 characters; for example,
 .Cm %f
@@ -444,3 +478,25 @@
 .Cm %512f
 and
 .Cm %512d .
+.Pp
+The
+.Cm %n$
+modifiers for positional arguments are not implemented.
+.Pp
+The
+.Cm l
+modifier for
+.Cm %c
+and
+.Cm %s
+(and
+.Cm %C
+and
+.Cm %S )
+to specify wide characters and strings is not implemented.
+.Pp
+The
+.Cm \&%a
+and
+.Cm \&%A
+floating-point formats are not implemented.
Index: vfscanf.c
===================================================================
RCS file: /home/ncvs/src/lib/libc/stdio/vfscanf.c,v
retrieving revision 1.19
diff -u -r1.19 vfscanf.c
--- vfscanf.c	29 Nov 2001 03:03:55 -0000	1.19
+++ vfscanf.c	30 Jan 2002 05:16:58 -0000
@@ -45,6 +45,7 @@
 #include "namespace.h"
 #include <stdio.h>
 #include <stdlib.h>
+#include <stddef.h>
 #include <ctype.h>
 #if __STDC__
 #include <stdarg.h>
@@ -52,6 +53,7 @@
 #include <varargs.h>
 #endif
 #include <string.h>
+#include <inttypes.h>
 #include "un-namespace.h"
 
 #include "collate.h"
@@ -76,7 +78,11 @@
 #define	SUPPRESS	0x08	/* suppress assignment */
 #define	POINTER		0x10	/* weird %p pointer (`fake hex') */
 #define	NOSKIP		0x20	/* do not skip blanks */
-#define	QUAD		0x400
+#define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
+#define	INTMAXT		0x800	/* j: intmax_t */
+#define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
+#define	SIZET		0x2000	/* z: size_t */
+#define	SHORTSHORT	0x4000	/* hh: char */
 
 /*
  * The following are used in numeric conversions only:
@@ -98,13 +104,10 @@
 #define	CT_CHAR		0	/* %c conversion */
 #define	CT_CCL		1	/* %[...] conversion */
 #define	CT_STRING	2	/* %s conversion */
-#define	CT_INT		3	/* integer, i.e., strtoq or strtouq */
+#define	CT_INT		3	/* integer, i.e., strtoimax or strtoumax */
 #define	CT_FLOAT	4	/* floating, i.e., strtod */
 
-#define u_char unsigned char
-#define u_long unsigned long
-
-static u_char *__sccl(char *, u_char *);
+static const u_char *__sccl(char *, const u_char *);
 
 /*
  * __vfscanf - MT-safe version
@@ -124,9 +127,9 @@
  * __svfscanf - non-MT-safe version of __vfscanf
  */
 int
-__svfscanf(FILE *fp, char const *fmt0, va_list ap)
+__svfscanf(FILE *fp, const char *fmt0, va_list ap)
 {
-	u_char *fmt = (u_char *)fmt0;
+	const u_char *fmt = (const u_char *)fmt0;
 	int c;			/* character from format, or conversion */
 	size_t width;		/* field width, or 0 */
 	char *p;		/* points into all kinds of strings */
@@ -136,8 +139,8 @@
 	int nassigned;		/* number of fields assigned */
 	int nconversions;	/* number of conversions */
 	int nread;		/* number of characters consumed from fp */
-	int base;		/* base argument to strtoq/strtouq */
-	u_quad_t(*ccfn)();	/* conversion function (strtoq/strtouq) */
+	int base;		/* base argument to strtoimax/strtoumax */
+	uintmax_t(*ccfn)();	/* conversion function (strtoimax/strtoumax) */
 	char ccltab[256];	/* character class table for %[...] */
 	char buf[BUF];		/* buffer for numeric conversions */
 
@@ -185,17 +188,34 @@
 		case '*':
 			flags |= SUPPRESS;
 			goto again;
+		case 'j':
+			flags |= INTMAXT;
+			goto again;
 		case 'l':
-			flags |= LONG;
+			if (flags & LONG) {
+				flags &= ~LONG;
+				flags |= LONGLONG;
+			} else
+				flags |= LONG;
 			goto again;
 		case 'q':
-			flags |= QUAD;
+			flags |= LONGLONG;	/* not quite */
+			goto again;
+		case 't':
+			flags |= PTRDIFFT;
+			goto again;
+		case 'z':
+			flags |= SIZET;
 			goto again;
 		case 'L':
 			flags |= LONGDBL;
 			goto again;
 		case 'h':
-			flags |= SHORT;
+			if (flags & SHORT) {
+				flags &= ~SHORT;
+				flags |= SHORTSHORT;
+			} else
+				flags |= SHORT;
 			goto again;
 
 		case '0': case '1': case '2': case '3': case '4':
@@ -205,61 +225,49 @@
 
 		/*
 		 * Conversions.
-		 * Those marked `compat' are for 4.[123]BSD compatibility.
-		 *
-		 * (According to ANSI, E and X formats are supposed
-		 * to the same as e and x.  Sorry about that.)
 		 */
-		case 'D':	/* compat */
-			flags |= LONG;
-			/* FALLTHROUGH */
 		case 'd':
 			c = CT_INT;
-			ccfn = (u_quad_t (*)())strtoq;
+			ccfn = (uintmax_t (*)())strtoimax;
 			base = 10;
 			break;
 
 		case 'i':
 			c = CT_INT;
-			ccfn = (u_quad_t (*)())strtoq;
+			ccfn = (uintmax_t (*)())strtoimax;
 			base = 0;
 			break;
 
-		case 'O':	/* compat */
-			flags |= LONG;
-			/* FALLTHROUGH */
 		case 'o':
 			c = CT_INT;
-			ccfn = strtouq;
+			ccfn = strtoumax;
 			base = 8;
 			break;
 
 		case 'u':
 			c = CT_INT;
-			ccfn = strtouq;
+			ccfn = strtoumax;
 			base = 10;
 			break;
 
-		case 'X':	/* compat   XXX */
-			flags |= LONG;
-			/* FALLTHROUGH */
+		case 'X':
 		case 'x':
 			flags |= PFXOK;	/* enable 0x prefixing */
 			c = CT_INT;
-			ccfn = strtouq;
+			ccfn = strtoumax;
 			base = 16;
 			break;
 
 #ifdef FLOATING_POINT
-		case 'E':	/* compat   XXX */
-		case 'F':	/* compat */
-			flags |= LONG;
-			/* FALLTHROUGH */
+		case 'E': case 'F': case 'G':
 		case 'e': case 'f': case 'g':
 			c = CT_FLOAT;
 			break;
 #endif
 
+		case 'S':
+			flags |= LONG;
+			/* FALLTHROUGH */
 		case 's':
 			c = CT_STRING;
 			break;
@@ -270,6 +278,9 @@
 			c = CT_CCL;
 			break;
 
+		case 'C':
+			flags |= LONG;
+			/* FALLTHROUGH */
 		case 'c':
 			flags |= NOSKIP;
 			c = CT_CHAR;
@@ -278,7 +289,7 @@
 		case 'p':	/* pointer format is like hex */
 			flags |= POINTER | PFXOK;
 			c = CT_INT;
-			ccfn = strtouq;
+			ccfn = strtoumax;
 			base = 16;
 			break;
 
@@ -286,29 +297,32 @@
 			nconversions++;
 			if (flags & SUPPRESS)	/* ??? */
 				continue;
-			if (flags & SHORT)
+			if (flags & SHORTSHORT)
+				*va_arg(ap, char *) = nread;
+			else if (flags & SHORT)
 				*va_arg(ap, short *) = nread;
 			else if (flags & LONG)
 				*va_arg(ap, long *) = nread;
-			else if (flags & QUAD)
-				*va_arg(ap, quad_t *) = nread;
+			else if (flags & LONGLONG)
+				*va_arg(ap, long long *) = nread;
+			else if (flags & INTMAXT)
+				*va_arg(ap, intmax_t *) = nread;
+			else if (flags & SIZET)
+				*va_arg(ap, size_t *) = nread;
+			else if (flags & PTRDIFFT)
+				*va_arg(ap, ptrdiff_t *) = nread;
 			else
 				*va_arg(ap, int *) = nread;
 			continue;
 
+		default:
+			goto match_failure;
+
 		/*
-		 * Disgusting backwards compatibility hacks.	XXX
+		 * Disgusting backwards compatibility hack.	XXX
 		 */
 		case '\0':	/* compat */
 			return (EOF);
-
-		default:	/* compat */
-			if (isupper(c))
-				flags |= LONG;
-			c = CT_INT;
-			ccfn = (u_quad_t (*)())strtoq;
-			base = 10;
-			break;
 		}
 
 		/*
@@ -451,7 +465,7 @@
 			continue;
 
 		case CT_INT:
-			/* scan an integer as if by strtoq/strtouq */
+			/* scan an integer as if by strtoimax/strtoumax */
 #ifdef hardway
 			if (width == 0 || width > sizeof(buf) - 1)
 				width = sizeof(buf) - 1;
@@ -569,19 +583,27 @@
 				(void) __ungetc(c, fp);
 			}
 			if ((flags & SUPPRESS) == 0) {
-				u_quad_t res;
+				uintmax_t res;
 
 				*p = 0;
 				res = (*ccfn)(buf, (char **)NULL, base);
 				if (flags & POINTER)
 					*va_arg(ap, void **) =
-						(void *)(u_long)res;
+							(void *)(uintptr_t)res;
+				else if (flags & SHORTSHORT)
+					*va_arg(ap, char *) = res;
 				else if (flags & SHORT)
 					*va_arg(ap, short *) = res;
 				else if (flags & LONG)
 					*va_arg(ap, long *) = res;
-				else if (flags & QUAD)
-					*va_arg(ap, quad_t *) = res;
+				else if (flags & LONGLONG)
+					*va_arg(ap, long long *) = res;
+				else if (flags & INTMAXT)
+					*va_arg(ap, intmax_t *) = res;
+				else if (flags & PTRDIFFT)
+					*va_arg(ap, ptrdiff_t *) = res;
+				else if (flags & SIZET)
+					*va_arg(ap, size_t *) = res;
 				else
 					*va_arg(ap, int *) = res;
 				nassigned++;
@@ -700,10 +722,10 @@
  * closing `]'.  The table has a 1 wherever characters should be
  * considered part of the scanset.
  */
-static u_char *
+static const u_char *
 __sccl(tab, fmt)
 	register char *tab;
-	register u_char *fmt;
+	register const u_char *fmt;
 {
 	register int c, n, v, i;
 

--19701020--

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-standards" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200201300531.g0U5VEh48095>