Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 23 May 2002 05:00:15 +0000
From:      Dima Dorfman <dima@trit.org>
To:        audit@freebsd.org
Subject:   %j for printf(9)
Message-ID:  <20020523050017.E3A2A3E22@turbine.trit.org>

next in thread | raw e-mail | index | archive | help
Attached is a patch that implements the %j length modifier in
printf(9).  I would appreciate it if someone could test it on one (or
more) of the 64-bit architectures (applying and checking if ddb ps
output looks sensible should be sufficient); I have no reason to think
it will break there, but breaking printf for !i386 would be a Bad
Thing(tm).

I would also appreciate it if someone could review this, particularly
looking for anywhere I assume signed or unsigned where I shouldn't
(I'm concerned with mis-{sign,zero}-extension).

See message-id <20020405080000.754FB3E31@bazooka.trit.org> for the
discussion on -standards.  The patch has not changed since then.

I would also like to solicit comments on what to do with %z; C99 says
this is the length modifier for size_t, but the kernel treats it as
signed hex.  This patch implements the C99 length modifier as %Z, but
that probably shouldn't stay.  IIRC, I couldn't find any place in the
kernel that we actually use %z; can it perhaps be removed?

I'd like to get this into the tree pretty soon, since someone already
used %j in the kernel.

Thanks.

Index: subr_prf.c
===================================================================
RCS file: /ref/cvsf/src/sys/kern/subr_prf.c,v
retrieving revision 1.80
diff -u -r1.80 subr_prf.c
--- subr_prf.c	1 Apr 2002 21:30:49 -0000	1.80
+++ subr_prf.c	4 Apr 2002 02:33:56 -0000
@@ -40,6 +40,7 @@
  */
 
 #include <sys/param.h>
+#include <sys/stdint.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
@@ -78,6 +79,12 @@
 	size_t	remain;
 };
 
+/* Length modifier. */
+enum lenmod {
+	LM_POINTER, LM_QUAD, LM_UQUAD, LM_LONG, LM_ULONG,
+	LM_INT, LM_UINT, LM_INTMAX, LM_UINTMAX, LM_SIZET
+};
+
 extern	int log_open;
 
 struct	tty *constty;			/* pointer to console "window" tty */
@@ -86,8 +93,9 @@
 static void  msglogchar(int c, int pri);
 static void  msgaddchar(int c, void *dummy);
 static void  putchar(int ch, void *arg);
-static char *ksprintn(char *nbuf, u_long num, int base, int *len);
-static char *ksprintqn(char *nbuf, u_quad_t num, int base, int *len);
+static char *ksprintn(char *nbuf, uintmax_t num, int base, int *len);
+static uintmax_t lm_arg(va_list *app, enum lenmod lm);
+static enum lenmod lm_unsigned(enum lenmod in);
 static void  snprintf_func(int ch, void *arg);
 
 static int consintr = 1;		/* Ok to handle console interrupts? */
@@ -419,9 +427,9 @@
  * The buffer pointed to by `nbuf' must have length >= MAXNBUF.
  */
 static char *
-ksprintn(nbuf, ul, base, lenp)
+ksprintn(nbuf, um, base, lenp)
 	char *nbuf;
-	u_long ul;
+	uintmax_t um;
 	int base, *lenp;
 {
 	char *p;
@@ -429,29 +437,92 @@
 	p = nbuf;
 	*p = '\0';
 	do {
-		*++p = hex2ascii(ul % base);
-	} while (ul /= base);
+		*++p = hex2ascii(um % base);
+	} while (um /= base);
 	if (lenp)
 		*lenp = p - nbuf;
 	return (p);
 }
-/* ksprintn, but for a quad_t. */
-static char *
-ksprintqn(nbuf, uq, base, lenp)
-	char *nbuf;
-	u_quad_t uq;
-	int base, *lenp;
+
+/*
+ * Retrieve the next argument in `app' according to `lm'.
+ */
+static uintmax_t
+lm_arg(va_list *app, enum lenmod lm)
 {
-	char *p;
+	uintmax_t um;
 
-	p = nbuf;
-	*p = '\0';
-	do {
-		*++p = hex2ascii(uq % base);
-	} while (uq /= base);
-	if (lenp)
-		*lenp = p - nbuf;
-	return (p);
+	switch (lm) {
+	case LM_POINTER:
+		um = (uintptr_t)va_arg(*app, void *);
+		break;
+	case LM_QUAD:
+		um = (quad_t)va_arg(*app, quad_t);
+		break;
+	case LM_UQUAD:
+		um = (u_quad_t)va_arg(*app, u_quad_t);
+		break;
+	case LM_LONG:
+		um = (long)va_arg(*app, long);
+		break;
+	case LM_ULONG:
+		um = (u_long)va_arg(*app, u_long);
+		break;
+	case LM_INT:
+		um = (int)va_arg(*app, int);
+		break;
+	case LM_UINT:
+		um = (u_int)va_arg(*app, u_int);
+		break;
+	case LM_INTMAX:
+		um = (intmax_t)va_arg(*app, intmax_t);
+		break;
+	case LM_UINTMAX:
+		um = (uintmax_t)va_arg(*app, uintmax_t);
+		break;
+	case LM_SIZET:
+		um = (size_t)va_arg(*app, size_t);
+		break;
+	default:
+		panic("lm_arg: unknown lm=%d", lm);
+	}
+	return (um);
+}
+
+/*
+ * Return the unsigned counerpart of the length modifier passed in.
+ */
+static enum lenmod
+lm_unsigned(enum lenmod in)
+{
+	enum lenmod out;
+
+	switch (in) {
+	case LM_QUAD:
+		out = LM_UQUAD;
+		break;
+	case LM_LONG:
+		out = LM_ULONG;
+		break;
+	case LM_INT:
+		out = LM_UINT;
+		break;
+	case LM_INTMAX:
+		out = LM_UINTMAX;
+		break;
+	case LM_UQUAD:
+	case LM_ULONG:
+	case LM_UINT:
+	case LM_UINTMAX:
+	case LM_POINTER:
+	case LM_SIZET:		/* Should we do something special with this? */
+		/* These are already unsigned, so just return the input. */
+		out = in;
+		break;
+	default:
+		panic("lm_unsigned: unknown lm=%d", in);
+	}
+	return (out);
 }
 
 /*
@@ -488,15 +559,14 @@
 	char *p, *q, *d;
 	u_char *up;
 	int ch, n;
-	u_long ul;
-	u_quad_t uq;
-	int base, lflag, qflag, tmp, width, ladjust, sharpflag, neg, sign, dot;
+	uintmax_t um;
+	enum lenmod lm;
+	int base, tmp, width, ladjust, sharpflag, neg, sign, dot;
 	int dwidth;
 	char padc;
 	int retval = 0;
 
-	ul = 0;
-	uq = 0;
+	um = 0;
 	if (!func)
 		d = (char *) arg;
 	else
@@ -516,7 +586,8 @@
 				return (retval);
 			PCHAR(ch);
 		}
-		qflag = 0; lflag = 0; ladjust = 0; sharpflag = 0; neg = 0;
+		lm = LM_INT;
+		ladjust = 0; sharpflag = 0; neg = 0;
 		sign = 0; dot = 0; dwidth = 0;
 reswitch:	switch (ch = (u_char)*fmt++) {
 		case '.':
@@ -564,17 +635,17 @@
 				width = n;
 			goto reswitch;
 		case 'b':
-			ul = va_arg(ap, int);
+			um = va_arg(ap, int);
 			p = va_arg(ap, char *);
-			for (q = ksprintn(nbuf, ul, *p++, NULL); *q;)
+			for (q = ksprintn(nbuf, um, *p++, NULL); *q;)
 				PCHAR(*q--);
 
-			if (!ul)
+			if (!um)
 				break;
 
 			for (tmp = 0; *p;) {
 				n = *p++;
-				if (ul & (1 << (n - 1))) {
+				if (um & (1 << (n - 1))) {
 					PCHAR(tmp ? ',' : '<');
 					for (; (n = *p) > ' '; ++p)
 						PCHAR(n);
@@ -604,48 +675,34 @@
 			}
 			break;
 		case 'd':
-			if (qflag)
-				uq = va_arg(ap, quad_t);
-			else if (lflag)
-				ul = va_arg(ap, long);
-			else
-				ul = va_arg(ap, int);
+			um = lm_arg(&ap, lm);
 			sign = 1;
 			base = 10;
 			goto number;
+		case 'j':
+			lm = LM_INTMAX;
+			goto reswitch;
 		case 'l':
-			if (lflag) {
-				lflag = 0;
-				qflag = 1;
-			} else
-				lflag = 1;
+			if (lm == LM_LONG)
+				lm = LM_QUAD;
+			else
+				lm = LM_LONG;
 			goto reswitch;
 		case 'o':
-			if (qflag)
-				uq = va_arg(ap, u_quad_t);
-			else if (lflag)
-				ul = va_arg(ap, u_long);
-			else
-				ul = va_arg(ap, u_int);
+			um = lm_arg(&ap, lm_unsigned(lm));
 			base = 8;
 			goto nosign;
 		case 'p':
-			ul = (uintptr_t)va_arg(ap, void *);
+			um = lm_arg(&ap, LM_POINTER);
 			base = 16;
 			sharpflag = (width == 0);
 			goto nosign;
 		case 'q':
-			qflag = 1;
+			lm = LM_QUAD;
 			goto reswitch;
 		case 'n':
 		case 'r':
-			if (qflag)
-				uq = va_arg(ap, u_quad_t);
-			else if (lflag)
-				ul = va_arg(ap, u_long);
-			else
-				ul = sign ?
-				    (u_long)va_arg(ap, int) : va_arg(ap, u_int);
+			um = lm_arg(&ap, sign ? lm : lm_unsigned(lm));
 			base = radix;
 			goto number;
 		case 's':
@@ -670,50 +727,29 @@
 					PCHAR(padc);
 			break;
 		case 'u':
-			if (qflag)
-				uq = va_arg(ap, u_quad_t);
-			else if (lflag)
-				ul = va_arg(ap, u_long);
-			else
-				ul = va_arg(ap, u_int);
+			um = lm_arg(&ap, lm_unsigned(lm));
 			base = 10;
 			goto nosign;
 		case 'x':
 		case 'X':
-			if (qflag)
-				uq = va_arg(ap, u_quad_t);
-			else if (lflag)
-				ul = va_arg(ap, u_long);
-			else
-				ul = va_arg(ap, u_int);
+			um = lm_arg(&ap, lm_unsigned(lm));
 			base = 16;
 			goto nosign;
+		case 'Z':	/* XXX: This should be 'z'. */
+			lm = LM_SIZET;
+			goto reswitch;
 		case 'z':
-			if (qflag)
-				uq = va_arg(ap, u_quad_t);
-			else if (lflag)
-				ul = va_arg(ap, u_long);
-			else
-				ul = sign ?
-				    (u_long)va_arg(ap, int) : va_arg(ap, u_int);
+			um = lm_arg(&ap, sign ? lm : lm_unsigned(lm));
 			base = 16;
 			goto number;
 nosign:			sign = 0;
 number:			
-			if (qflag) {
-				if (sign && (quad_t)uq < 0) {
-					neg = 1;
-					uq = -(quad_t)uq;
-				}
-				p = ksprintqn(nbuf, uq, base, &tmp);
-			} else {
-				if (sign && (long)ul < 0) {
-					neg = 1;
-					ul = -(long)ul;
-				}
-				p = ksprintn(nbuf, ul, base, &tmp);
+			if (sign && (intmax_t)um < 0) {
+				neg = 1;
+				um = -(intmax_t)um;
 			}
-			if (sharpflag && (qflag ? uq != 0 : ul != 0)) {
+			p = ksprintn(nbuf, um, base, &tmp);
+			if (sharpflag && um != 0) {
 				if (base == 8)
 					tmp++;
 				else if (base == 16)
@@ -727,7 +763,7 @@
 					PCHAR(padc);
 			if (neg)
 				PCHAR('-');
-			if (sharpflag && (qflag ? uq != 0 : ul != 0)) {
+			if (sharpflag && um != 0) {
 				if (base == 8) {
 					PCHAR('0');
 				} else if (base == 16) {
@@ -746,7 +782,17 @@
 			break;
 		default:
 			PCHAR('%');
-			if (lflag)
+			/*
+			 * XXX: This is bogus.  We can have more flags
+			 * than just `l', and we can even have `l'
+			 * more than once.  The old test was
+			 *
+			 *	if (lflag)
+			 *
+			 * and we're just imitating that for now.
+			 */
+			if (lm == LM_LONG || lm == LM_ULONG ||
+			    lm == LM_QUAD || lm == LM_UQUAD)
 				PCHAR('l');
 			PCHAR(ch);
 			break;
@@ -776,7 +822,7 @@
 			dangling = 0;
 		}
 		msgaddchar('<', NULL);
-		for (p = ksprintn(nbuf, (u_long)pri, 10, NULL); *p;)
+		for (p = ksprintn(nbuf, (uintmax_t)pri, 10, NULL); *p;)
 			msgaddchar(*p--, NULL);
 		msgaddchar('>', NULL);
 		lastpri = pri;

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-audit" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20020523050017.E3A2A3E22>