From owner-svn-src-all@FreeBSD.ORG Thu Jan 22 08:14:29 2009 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 13A3C106568C; Thu, 22 Jan 2009 08:14:29 +0000 (UTC) (envelope-from das@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 013608FC18; Thu, 22 Jan 2009 08:14:29 +0000 (UTC) (envelope-from das@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n0M8ESMe004933; Thu, 22 Jan 2009 08:14:28 GMT (envelope-from das@svn.freebsd.org) Received: (from das@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n0M8ESfc004930; Thu, 22 Jan 2009 08:14:28 GMT (envelope-from das@svn.freebsd.org) Message-Id: <200901220814.n0M8ESfc004930@svn.freebsd.org> From: David Schultz Date: Thu, 22 Jan 2009 08:14:28 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r187582 - head/lib/libc/stdio X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 22 Jan 2009 08:14:29 -0000 Author: das Date: Thu Jan 22 08:14:28 2009 New Revision: 187582 URL: http://svn.freebsd.org/changeset/base/187582 Log: Add support for multibyte thousands_sep encodings, e.g., U+066C. The integer thousands' separator code is rewritten in order to avoid having to preallocate a buffer for the largest possible digit string with the most possible instances of the longest possible multibyte thousands' separator. The new version inserts thousands' separators for integers using the same code as floating point. Modified: head/lib/libc/stdio/printfcommon.h head/lib/libc/stdio/vfprintf.c head/lib/libc/stdio/vfwprintf.c Modified: head/lib/libc/stdio/printfcommon.h ============================================================================== --- head/lib/libc/stdio/printfcommon.h Thu Jan 22 06:39:31 2009 (r187581) +++ head/lib/libc/stdio/printfcommon.h Thu Jan 22 08:14:28 2009 (r187582) @@ -54,10 +54,8 @@ static int exponent(CHAR *, int, CHAR); #endif /* !NO_FLOATING_POINT */ -static CHAR *__ujtoa(uintmax_t, CHAR *, int, int, const char *, int, char, - const char *); -static CHAR *__ultoa(u_long, CHAR *, int, int, const char *, int, char, - const char *); +static CHAR *__ujtoa(uintmax_t, CHAR *, int, int, const char *); +static CHAR *__ultoa(u_long, CHAR *, int, int, const char *); #define NIOV 8 struct io_state { @@ -158,12 +156,10 @@ io_flush(struct io_state *iop) * use the given digits. */ static CHAR * -__ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs, - int needgrp, char thousep, const char *grp) +__ultoa(u_long val, CHAR *endp, int base, int octzero, const char *xdigs) { CHAR *cp = endp; long sval; - int ndig; /* * Handle the three cases separately, in the hope of getting @@ -175,7 +171,6 @@ __ultoa(u_long val, CHAR *endp, int base *--cp = to_char(val); return (cp); } - ndig = 0; /* * On many machines, unsigned arithmetic is harder than * signed arithmetic, so we do at most one unsigned mod and @@ -184,29 +179,11 @@ __ultoa(u_long val, CHAR *endp, int base */ if (val > LONG_MAX) { *--cp = to_char(val % 10); - ndig++; sval = val / 10; } else sval = val; do { *--cp = to_char(sval % 10); - ndig++; - /* - * If (*grp == CHAR_MAX) then no more grouping - * should be performed. - */ - if (needgrp && ndig == *grp && *grp != CHAR_MAX - && sval > 9) { - *--cp = thousep; - ndig = 0; - /* - * If (*(grp+1) == '\0') then we have to - * use *grp character (last grouping rule) - * for all next cases - */ - if (*(grp+1) != '\0') - grp++; - } sval /= 10; } while (sval != 0); break; @@ -235,50 +212,28 @@ __ultoa(u_long val, CHAR *endp, int base /* Identical to __ultoa, but for intmax_t. */ static CHAR * -__ujtoa(uintmax_t val, CHAR *endp, int base, int octzero, const char *xdigs, - int needgrp, char thousep, const char *grp) +__ujtoa(uintmax_t val, CHAR *endp, int base, int octzero, const char *xdigs) { CHAR *cp = endp; intmax_t sval; - int ndig; /* quick test for small values; __ultoa is typically much faster */ /* (perhaps instead we should run until small, then call __ultoa?) */ if (val <= ULONG_MAX) - return (__ultoa((u_long)val, endp, base, octzero, xdigs, - needgrp, thousep, grp)); + return (__ultoa((u_long)val, endp, base, octzero, xdigs)); switch (base) { case 10: if (val < 10) { *--cp = to_char(val % 10); return (cp); } - ndig = 0; if (val > INTMAX_MAX) { *--cp = to_char(val % 10); - ndig++; sval = val / 10; } else sval = val; do { *--cp = to_char(sval % 10); - ndig++; - /* - * If (*grp == CHAR_MAX) then no more grouping - * should be performed. - */ - if (needgrp && *grp != CHAR_MAX && ndig == *grp - && sval > 9) { - *--cp = thousep; - ndig = 0; - /* - * If (*(grp+1) == '\0') then we have to - * use *grp character (last grouping rule) - * for all next cases - */ - if (*(grp+1) != '\0') - grp++; - } sval /= 10; } while (sval != 0); break; Modified: head/lib/libc/stdio/vfprintf.c ============================================================================== --- head/lib/libc/stdio/vfprintf.c Thu Jan 22 06:39:31 2009 (r187581) +++ head/lib/libc/stdio/vfprintf.c Thu Jan 22 08:14:28 2009 (r187582) @@ -72,6 +72,75 @@ static char *__wcsconv(wchar_t *, int); #define CHAR char #include "printfcommon.h" +struct grouping_state { + char *thousands_sep; /* locale-specific thousands separator */ + int thousep_len; /* length of thousands_sep */ + const char *grouping; /* locale-specific numeric grouping rules */ + int lead; /* sig figs before decimal or group sep */ + int nseps; /* number of group separators with ' */ + int nrepeats; /* number of repeats of the last group */ +}; + +/* + * Initialize the thousands' grouping state in preparation to print a + * number with ndigits digits. This routine returns the total number + * of bytes that will be needed. + */ +static int +grouping_init(struct grouping_state *gs, int ndigits) +{ + struct lconv *locale; + + locale = localeconv(); + gs->grouping = locale->grouping; + gs->thousands_sep = locale->thousands_sep; + gs->thousep_len = strlen(gs->thousands_sep); + + gs->nseps = gs->nrepeats = 0; + gs->lead = ndigits; + while (*gs->grouping != CHAR_MAX) { + if (gs->lead <= *gs->grouping) + break; + gs->lead -= *gs->grouping; + if (*(gs->grouping+1)) { + gs->nseps++; + gs->grouping++; + } else + gs->nrepeats++; + } + return ((gs->nseps + gs->nrepeats) * gs->thousep_len); +} + +/* + * Print a number with thousands' separators. + */ +static int +grouping_print(struct grouping_state *gs, struct io_state *iop, + const CHAR *cp, const CHAR *ep) +{ + const CHAR *cp0 = cp; + + if (io_printandpad(iop, cp, ep, gs->lead, zeroes)) + return (-1); + cp += gs->lead; + while (gs->nseps > 0 || gs->nrepeats > 0) { + if (gs->nrepeats > 0) + gs->nrepeats--; + else { + gs->grouping--; + gs->nseps--; + } + if (io_print(iop, gs->thousands_sep, gs->thousep_len)) + return (-1); + if (io_printandpad(iop, cp, ep, *gs->grouping, zeroes)) + return (-1); + cp += *gs->grouping; + } + if (cp > ep) + cp = ep; + return (cp - cp0); +} + /* * Flush out all the vectors defined by the given uio, * then reset it so that it can be reused. @@ -210,12 +279,14 @@ vfprintf(FILE * __restrict fp, const cha /* * The size of the buffer we use as scratch space for integer - * conversions, among other things. Technically, we would need the - * most space for base 10 conversions with thousands' grouping - * characters between each pair of digits. 100 bytes is a - * conservative overestimate even for a 128-bit uintmax_t. + * conversions, among other things. We need enough space to + * write a uintmax_t in octal (plus one byte). */ -#define BUF 100 +#if UINTMAX_MAX <= UINT64_MAX +#define BUF 32 +#else +#error "BUF must be large enough to format a uintmax_t" +#endif /* * Non-MT-safe version @@ -232,8 +303,7 @@ __vfprintf(FILE *fp, const char *fmt0, v int width; /* width from format (%8d), or 0 */ int prec; /* precision from format; <0 for N/A */ char sign; /* sign prefix (' ', '+', '-', or \0) */ - char thousands_sep; /* locale specific thousands separator */ - const char *grouping; /* locale specific numeric grouping rules */ + struct grouping_state gs; /* thousands' grouping info */ #ifndef NO_FLOATING_POINT /* @@ -261,12 +331,9 @@ __vfprintf(FILE *fp, const char *fmt0, v char expchar; /* exponent character: [eEpP\0] */ char *dtoaend; /* pointer to end of converted digits */ int expsize; /* character count for expstr */ - int lead; /* sig figs before decimal or group sep */ int ndig; /* actual number of digits returned by dtoa */ char expstr[MAXEXPDIG+2]; /* buffer for exponent string: e+ZZZ */ char *dtoaresult; /* buffer allocated by dtoa */ - int nseps; /* number of group separators with ' */ - int nrepeats; /* number of repeats of the last group */ #endif u_long ulval; /* integer arguments %[diouxX] */ uintmax_t ujval; /* %j, %ll, %q, %t, %z integers */ @@ -378,8 +445,6 @@ __vfprintf(FILE *fp, const char *fmt0, v if (prepwrite(fp) != 0) return (EOF); - thousands_sep = '\0'; - grouping = NULL; convbuf = NULL; fmt = (char *)fmt0; argtable = NULL; @@ -416,6 +481,7 @@ __vfprintf(FILE *fp, const char *fmt0, v dprec = 0; width = 0; prec = -1; + gs.grouping = NULL; sign = '\0'; ox[1] = '\0'; @@ -453,8 +519,6 @@ reswitch: switch (ch) { goto rflag; case '\'': flags |= GROUPING; - thousands_sep = *(localeconv()->thousands_sep); - grouping = localeconv()->grouping; goto rflag; case '.': if ((ch = *fmt++) == '*') { @@ -685,23 +749,8 @@ fp_common: /* space for decimal pt and following digits */ if (prec || flags & ALT) size += prec + decpt_len; - if (grouping && expt > 0) { - /* space for thousands' grouping */ - nseps = nrepeats = 0; - lead = expt; - while (*grouping != CHAR_MAX) { - if (lead <= *grouping) - break; - lead -= *grouping; - if (*(grouping+1)) { - nseps++; - grouping++; - } else - nrepeats++; - } - size += nseps + nrepeats; - } else - lead = expt; + if ((flags & GROUPING) && expt > 0) + size += grouping_init(&gs, expt); } break; #endif /* !NO_FLOATING_POINT */ @@ -842,20 +891,18 @@ number: if ((dprec = prec) >= 0) if (ujval != 0 || prec != 0 || (flags & ALT && base == 8)) cp = __ujtoa(ujval, cp, base, - flags & ALT, xdigs, - flags & GROUPING, thousands_sep, - grouping); + flags & ALT, xdigs); } else { if (ulval != 0 || prec != 0 || (flags & ALT && base == 8)) cp = __ultoa(ulval, cp, base, - flags & ALT, xdigs, - flags & GROUPING, thousands_sep, - grouping); + flags & ALT, xdigs); } size = buf + BUF - cp; if (size > BUF) /* should never happen */ abort(); + if ((flags & GROUPING) && size != 0) + size += grouping_init(&gs, size); break; default: /* "%?" prints ?, unless ? is NUL */ if (ch == '\0') @@ -911,13 +958,19 @@ number: if ((dprec = prec) >= 0) if ((flags & (LADJUST|ZEROPAD)) == ZEROPAD) PAD(width - realsz, zeroes); - /* leading zeroes from decimal precision */ - PAD(dprec - size, zeroes); - /* the string or number proper */ #ifndef NO_FLOATING_POINT if ((flags & FPT) == 0) { - PRINT(cp, size); +#endif + /* leading zeroes from decimal precision */ + PAD(dprec - size, zeroes); + if (gs.grouping) { + if (grouping_print(&gs, &io, cp, buf+BUF) < 0) + goto error; + } else { + PRINT(cp, size); + } +#ifndef NO_FLOATING_POINT } else { /* glue together f_p fragments */ if (!expchar) { /* %[fF] or sufficiently short %[gG] */ if (expt <= 0) { @@ -928,24 +981,16 @@ number: if ((dprec = prec) >= 0) /* already handled initial 0's */ prec += expt; } else { - PRINTANDPAD(cp, dtoaend, lead, zeroes); - cp += lead; - if (grouping) { - while (nseps>0 || nrepeats>0) { - if (nrepeats > 0) - nrepeats--; - else { - grouping--; - nseps--; - } - PRINT(&thousands_sep, - 1); - PRINTANDPAD(cp,dtoaend, - *grouping, zeroes); - cp += *grouping; - } - if (cp > dtoaend) - cp = dtoaend; + if (gs.grouping) { + n = grouping_print(&gs, &io, + cp, dtoaend); + if (n < 0) + goto error; + cp += n; + } else { + PRINTANDPAD(cp, dtoaend, + expt, zeroes); + cp += expt; } if (prec || flags & ALT) PRINT(decimal_point,decpt_len); @@ -962,8 +1007,6 @@ number: if ((dprec = prec) >= 0) PRINT(expstr, expsize); } } -#else - PRINT(cp, size); #endif /* left-adjusting padding (always blank) */ if (flags & LADJUST) Modified: head/lib/libc/stdio/vfwprintf.c ============================================================================== --- head/lib/libc/stdio/vfwprintf.c Thu Jan 22 06:39:31 2009 (r187581) +++ head/lib/libc/stdio/vfwprintf.c Thu Jan 22 08:14:28 2009 (r187582) @@ -74,6 +74,14 @@ static wchar_t *__mbsconv(char *, int); #define CHAR wchar_t #include "printfcommon.h" +struct grouping_state { + wchar_t thousands_sep; /* locale-specific thousands separator */ + const char *grouping; /* locale-specific numeric grouping rules */ + int lead; /* sig figs before decimal or group sep */ + int nseps; /* number of group separators with ' */ + int nrepeats; /* number of repeats of the last group */ +}; + static const mbstate_t initial_mbs; static inline wchar_t @@ -90,6 +98,79 @@ get_decpt(void) return (decpt); } +static inline wchar_t +get_thousep(void) +{ + mbstate_t mbs; + wchar_t thousep; + int nconv; + + mbs = initial_mbs; + nconv = mbrtowc(&thousep, localeconv()->thousands_sep, + MB_CUR_MAX, &mbs); + if (nconv == (size_t)-1 || nconv == (size_t)-2) + thousep = '\0'; /* failsafe */ + return (thousep); +} + +/* + * Initialize the thousands' grouping state in preparation to print a + * number with ndigits digits. This routine returns the total number + * of wide characters that will be printed. + */ +static int +grouping_init(struct grouping_state *gs, int ndigits) +{ + + gs->grouping = localeconv()->grouping; + gs->thousands_sep = get_thousep(); + + gs->nseps = gs->nrepeats = 0; + gs->lead = ndigits; + while (*gs->grouping != CHAR_MAX) { + if (gs->lead <= *gs->grouping) + break; + gs->lead -= *gs->grouping; + if (*(gs->grouping+1)) { + gs->nseps++; + gs->grouping++; + } else + gs->nrepeats++; + } + return (gs->nseps + gs->nrepeats); +} + +/* + * Print a number with thousands' separators. + */ +static int +grouping_print(struct grouping_state *gs, struct io_state *iop, + const CHAR *cp, const CHAR *ep) +{ + const CHAR *cp0 = cp; + + if (io_printandpad(iop, cp, ep, gs->lead, zeroes)) + return (-1); + cp += gs->lead; + while (gs->nseps > 0 || gs->nrepeats > 0) { + if (gs->nrepeats > 0) + gs->nrepeats--; + else { + gs->grouping--; + gs->nseps--; + } + if (io_print(iop, &gs->thousands_sep, 1)) + return (-1); + if (io_printandpad(iop, cp, ep, *gs->grouping, zeroes)) + return (-1); + cp += *gs->grouping; + } + if (cp > ep) + cp = ep; + return (cp - cp0); +} + + /* * Flush out all the vectors defined by the given uio, * then reset it so that it can be reused. @@ -280,12 +361,14 @@ vfwprintf(FILE * __restrict fp, const wc /* * The size of the buffer we use as scratch space for integer - * conversions, among other things. Technically, we would need the - * most space for base 10 conversions with thousands' grouping - * characters between each pair of digits. 100 bytes is a - * conservative overestimate even for a 128-bit uintmax_t. + * conversions, among other things. We need enough space to + * write a uintmax_t in octal (plus one byte). */ -#define BUF 100 +#if UINTMAX_MAX <= UINT64_MAX +#define BUF 32 +#else +#error "BUF must be large enough to format a uintmax_t" +#endif /* * Non-MT-safe version @@ -302,8 +385,7 @@ __vfwprintf(FILE *fp, const wchar_t *fmt int width; /* width from format (%8d), or 0 */ int prec; /* precision from format; <0 for N/A */ wchar_t sign; /* sign prefix (' ', '+', '-', or \0) */ - wchar_t thousands_sep; /* locale specific thousands separator */ - const char *grouping; /* locale specific numeric grouping rules */ + struct grouping_state gs; /* thousands' grouping info */ #ifndef NO_FLOATING_POINT /* * We can decompose the printed representation of floating @@ -329,12 +411,9 @@ __vfwprintf(FILE *fp, const wchar_t *fmt char expchar; /* exponent character: [eEpP\0] */ char *dtoaend; /* pointer to end of converted digits */ int expsize; /* character count for expstr */ - int lead; /* sig figs before decimal or group sep */ int ndig; /* actual number of digits returned by dtoa */ wchar_t expstr[MAXEXPDIG+2]; /* buffer for exponent string: e+ZZZ */ char *dtoaresult; /* buffer allocated by dtoa */ - int nseps; /* number of group separators with ' */ - int nrepeats; /* number of repeats of the last group */ #endif u_long ulval; /* integer arguments %[diouxX] */ uintmax_t ujval; /* %j, %ll, %q, %t, %z integers */ @@ -442,8 +521,6 @@ __vfwprintf(FILE *fp, const wchar_t *fmt if (prepwrite(fp) != 0) return (EOF); - thousands_sep = '\0'; - grouping = NULL; convbuf = NULL; fmt = (wchar_t *)fmt0; argtable = NULL; @@ -477,6 +554,7 @@ __vfwprintf(FILE *fp, const wchar_t *fmt dprec = 0; width = 0; prec = -1; + gs.grouping = NULL; sign = '\0'; ox[1] = '\0'; @@ -514,8 +592,6 @@ reswitch: switch (ch) { goto rflag; case '\'': flags |= GROUPING; - thousands_sep = *(localeconv()->thousands_sep); - grouping = localeconv()->grouping; goto rflag; case '.': if ((ch = *fmt++) == '*') { @@ -739,23 +815,8 @@ fp_common: /* space for decimal pt and following digits */ if (prec || flags & ALT) size += prec + 1; - if (grouping && expt > 0) { - /* space for thousands' grouping */ - nseps = nrepeats = 0; - lead = expt; - while (*grouping != CHAR_MAX) { - if (lead <= *grouping) - break; - lead -= *grouping; - if (*(grouping+1)) { - nseps++; - grouping++; - } else - nrepeats++; - } - size += nseps + nrepeats; - } else - lead = expt; + if ((flags & GROUPING) && expt > 0) + size += grouping_init(&gs, expt); } break; #endif /* !NO_FLOATING_POINT */ @@ -899,20 +960,18 @@ number: if ((dprec = prec) >= 0) if (ujval != 0 || prec != 0 || (flags & ALT && base == 8)) cp = __ujtoa(ujval, cp, base, - flags & ALT, xdigs, - flags & GROUPING, thousands_sep, - grouping); + flags & ALT, xdigs); } else { if (ulval != 0 || prec != 0 || (flags & ALT && base == 8)) cp = __ultoa(ulval, cp, base, - flags & ALT, xdigs, - flags & GROUPING, thousands_sep, - grouping); + flags & ALT, xdigs); } size = buf + BUF - cp; if (size > BUF) /* should never happen */ abort(); + if ((flags & GROUPING) && size != 0) + size += grouping_init(&gs, size); break; default: /* "%?" prints ?, unless ? is NUL */ if (ch == '\0') @@ -968,13 +1027,19 @@ number: if ((dprec = prec) >= 0) if ((flags & (LADJUST|ZEROPAD)) == ZEROPAD) PAD(width - realsz, zeroes); - /* leading zeroes from decimal precision */ - PAD(dprec - size, zeroes); - /* the string or number proper */ #ifndef NO_FLOATING_POINT if ((flags & FPT) == 0) { - PRINT(cp, size); +#endif + /* leading zeroes from decimal precision */ + PAD(dprec - size, zeroes); + if (gs.grouping) { + if (grouping_print(&gs, &io, cp, buf+BUF) < 0) + goto error; + } else { + PRINT(cp, size); + } +#ifndef NO_FLOATING_POINT } else { /* glue together f_p fragments */ if (!expchar) { /* %[fF] or sufficiently short %[gG] */ if (expt <= 0) { @@ -985,25 +1050,16 @@ number: if ((dprec = prec) >= 0) /* already handled initial 0's */ prec += expt; } else { - PRINTANDPAD(cp, convbuf + ndig, lead, zeroes); - cp += lead; - if (grouping) { - while (nseps>0 || nrepeats>0) { - if (nrepeats > 0) - nrepeats--; - else { - grouping--; - nseps--; - } - PRINT(&thousands_sep, - 1); - PRINTANDPAD(cp, - convbuf + ndig, - *grouping, zeroes); - cp += *grouping; - } - if (cp > convbuf + ndig) - cp = convbuf + ndig; + if (gs.grouping) { + n = grouping_print(&gs, &io, + cp, convbuf + ndig); + if (n < 0) + goto error; + cp += n; + } else { + PRINTANDPAD(cp, convbuf + ndig, + expt, zeroes); + cp += expt; } if (prec || flags & ALT) PRINT(&decimal_point, 1); @@ -1021,8 +1077,6 @@ number: if ((dprec = prec) >= 0) PRINT(expstr, expsize); } } -#else - PRINT(cp, size); #endif /* left-adjusting padding (always blank) */ if (flags & LADJUST)