Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 8 Aug 2015 18:22:15 +0000 (UTC)
From:      Baptiste Daroussin <bapt@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r286459 - projects/collation/lib/libc/locale
Message-ID:  <201508081822.t78IMFfS085095@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: bapt
Date: Sat Aug  8 18:22:14 2015
New Revision: 286459
URL: https://svnweb.freebsd.org/changeset/base/286459

Log:
  Revamp CTYPE support (from Illumos & Dragonfly)
  
  Obtained from:	Dragonfly

Deleted:
  projects/collation/lib/libc/locale/ascii.c
Modified:
  projects/collation/lib/libc/locale/Makefile.inc
  projects/collation/lib/libc/locale/big5.c
  projects/collation/lib/libc/locale/collate.c
  projects/collation/lib/libc/locale/collate.h
  projects/collation/lib/libc/locale/euc.c
  projects/collation/lib/libc/locale/gb18030.c
  projects/collation/lib/libc/locale/gb2312.c
  projects/collation/lib/libc/locale/gbk.c
  projects/collation/lib/libc/locale/mblocal.h
  projects/collation/lib/libc/locale/mbsnrtowcs.c
  projects/collation/lib/libc/locale/mskanji.c
  projects/collation/lib/libc/locale/none.c
  projects/collation/lib/libc/locale/rune.c
  projects/collation/lib/libc/locale/setrunelocale.c
  projects/collation/lib/libc/locale/utf8.c
  projects/collation/lib/libc/locale/wcsnrtombs.c

Modified: projects/collation/lib/libc/locale/Makefile.inc
==============================================================================
--- projects/collation/lib/libc/locale/Makefile.inc	Sat Aug  8 18:14:59 2015	(r286458)
+++ projects/collation/lib/libc/locale/Makefile.inc	Sat Aug  8 18:22:14 2015	(r286459)
@@ -4,7 +4,7 @@
 # locale sources
 .PATH: ${LIBC_SRCTOP}/${LIBC_ARCH}/locale ${LIBC_SRCTOP}/locale
 
-SRCS+=	ascii.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \
+SRCS+=	big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \
 	gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \
 	ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \
 	mbrlen.c \

Modified: projects/collation/lib/libc/locale/big5.c
==============================================================================
--- projects/collation/lib/libc/locale/big5.c	Sat Aug  8 18:14:59 2015	(r286458)
+++ projects/collation/lib/libc/locale/big5.c	Sat Aug  8 18:22:14 2015	(r286459)
@@ -1,4 +1,6 @@
 /*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -19,11 +21,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -61,6 +59,12 @@ static size_t	_BIG5_mbrtowc(wchar_t * __
 static int	_BIG5_mbsinit(const mbstate_t *);
 static size_t	_BIG5_wcrtomb(char * __restrict, wchar_t,
 		    mbstate_t * __restrict);
+static size_t	_BIG5_mbsnrtowcs(wchar_t * __restrict,
+		    const char ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+static size_t	_BIG5_wcsnrtombs(char * __restrict,
+		    const wchar_t ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
 
 typedef struct {
 	wchar_t	ch;
@@ -72,6 +76,8 @@ _BIG5_init(struct xlocale_ctype *l, _Run
 
 	l->__mbrtowc = _BIG5_mbrtowc;
 	l->__wcrtomb = _BIG5_wcrtomb;
+	l->__mbsnrtowcs = _BIG5_mbsnrtowcs;
+	l->__wcsnrtombs = _BIG5_wcsnrtombs;
 	l->__mbsinit = _BIG5_mbsinit;
 	l->runes = rl;
 	l->__mb_cur_max = 2;
@@ -147,7 +153,7 @@ _BIG5_mbrtowc(wchar_t * __restrict pwc, 
 		wc = (wc << 8) | (*s++ & 0xff);
 		if (pwc != NULL)
 			*pwc = wc;
-                return (2);
+		return (2);
 	} else {
 		if (pwc != NULL)
 			*pwc = wc;
@@ -178,3 +184,17 @@ _BIG5_wcrtomb(char * __restrict s, wchar
 	*s = wc & 0xff;
 	return (1);
 }
+
+static size_t
+_BIG5_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
+    size_t nms, size_t len, mbstate_t * __restrict ps)
+{
+	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _BIG5_mbrtowc));
+}
+
+static size_t
+_BIG5_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+    size_t nwc, size_t len, mbstate_t * __restrict ps)
+{
+	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _BIG5_wcrtomb));
+}

Modified: projects/collation/lib/libc/locale/collate.c
==============================================================================
--- projects/collation/lib/libc/locale/collate.c	Sat Aug  8 18:14:59 2015	(r286458)
+++ projects/collation/lib/libc/locale/collate.c	Sat Aug  8 18:22:14 2015	(r286459)
@@ -1,4 +1,5 @@
 /*-
+ * Copyright 2014 Garrett D'Amore <garrett@damore.org>
  * Copright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
  *		at Electronni Visti IA, Kiev, Ukraine.

Modified: projects/collation/lib/libc/locale/collate.h
==============================================================================
--- projects/collation/lib/libc/locale/collate.h	Sat Aug  8 18:14:59 2015	(r286458)
+++ projects/collation/lib/libc/locale/collate.h	Sat Aug  8 18:22:14 2015	(r286459)
@@ -1,4 +1,5 @@
 /*-
+ * Copyright 2010 Nexenta Systmes, Inc.  All rights reserved.
  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
  *		at Electronni Visti IA, Kiev, Ukraine.
  *			All rights reserved.

Modified: projects/collation/lib/libc/locale/euc.c
==============================================================================
--- projects/collation/lib/libc/locale/euc.c	Sat Aug  8 18:14:59 2015	(r286458)
+++ projects/collation/lib/libc/locale/euc.c	Sat Aug  8 18:22:14 2015	(r286459)
@@ -1,4 +1,6 @@
 /*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -19,11 +21,7 @@
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
@@ -56,17 +54,56 @@ __FBSDID("$FreeBSD$");
 
 extern int __mb_sb_limit;
 
-static size_t	_EUC_mbrtowc(wchar_t * __restrict, const char * __restrict,
+static size_t	_EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict,
+    size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
+static size_t	_EUC_wcrtomb_impl(char * __restrict, wchar_t,
+    mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
+
+static size_t	_EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict,
 		    size_t, mbstate_t * __restrict);
-static int	_EUC_mbsinit(const mbstate_t *);
-static size_t	_EUC_wcrtomb(char * __restrict, wchar_t,
+static size_t	_EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict,
+		    size_t, mbstate_t * __restrict);
+static size_t	_EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict,
+		    size_t, mbstate_t * __restrict);
+static size_t	_EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict,
+		    size_t, mbstate_t * __restrict);
+
+static size_t	_EUC_CN_wcrtomb(char * __restrict, wchar_t,
+		    mbstate_t * __restrict);
+static size_t	_EUC_JP_wcrtomb(char * __restrict, wchar_t,
+		    mbstate_t * __restrict);
+static size_t	_EUC_KR_wcrtomb(char * __restrict, wchar_t,
+		    mbstate_t * __restrict);
+static size_t	_EUC_TW_wcrtomb(char * __restrict, wchar_t,
 		    mbstate_t * __restrict);
 
-typedef struct {
-	int	count[4];
-	wchar_t	bits[4];
-	wchar_t	mask;
-} _EucInfo;
+static size_t	_EUC_CN_mbsnrtowcs(wchar_t * __restrict,
+		    const char ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+static size_t	_EUC_JP_mbsnrtowcs(wchar_t * __restrict,
+		    const char ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+static size_t	_EUC_KR_mbsnrtowcs(wchar_t * __restrict,
+		    const char ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+static size_t	_EUC_TW_mbsnrtowcs(wchar_t * __restrict,
+		    const char ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+
+static size_t	_EUC_CN_wcsnrtombs(char * __restrict,
+		    const wchar_t ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+static size_t	_EUC_JP_wcsnrtombs(char * __restrict,
+		    const wchar_t ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+static size_t	_EUC_KR_wcsnrtombs(char * __restrict,
+		    const wchar_t ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+static size_t	_EUC_TW_wcsnrtombs(char * __restrict,
+		    const wchar_t ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+
+static int	_EUC_mbsinit(const mbstate_t *);
 
 typedef struct {
 	wchar_t	ch;
@@ -74,94 +111,218 @@ typedef struct {
 	int	want;
 } _EucState;
 
+static int
+_EUC_mbsinit(const mbstate_t *ps)
+{
+
+	return (ps == NULL || ((const _EucState *)ps)->want == 0);
+}
+
+/*
+ * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
+ */
 int
-_EUC_init(struct xlocale_ctype *l, _RuneLocale *rl)
+_EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl)
 {
-	_EucInfo *ei;
-	int x, new__mb_cur_max;
-	char *v, *e;
+	l->__mbrtowc = _EUC_CN_mbrtowc;
+	l->__wcrtomb = _EUC_CN_wcrtomb;
+	l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs;
+	l->__wcsnrtombs = _EUC_CN_wcsnrtombs;
+	l->__mbsinit = _EUC_mbsinit;
 
-	if (rl->__variable == NULL)
-		return (EFTYPE);
+	l->runes = rl;
+	l->__mb_cur_max = 4;
+	l->__mb_sb_limit = 256;
+	return (0);
+}
 
-	v = (char *)rl->__variable;
+static size_t
+_EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
+    size_t n, mbstate_t * __restrict ps)
+{
+	return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
+}
 
-	while (*v == ' ' || *v == '\t')
-		++v;
+static size_t
+_EUC_CN_mbsnrtowcs(wchar_t * __restrict dst,
+    const char ** __restrict src,
+    size_t nms, size_t len, mbstate_t * __restrict ps)
+{
+	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc));
+}
 
-	if ((ei = malloc(sizeof(_EucInfo))) == NULL)
-		return (errno == 0 ? ENOMEM : errno);
+static size_t
+_EUC_CN_wcrtomb(char * __restrict s, wchar_t wc,
+    mbstate_t * __restrict ps)
+{
+	return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
+}
 
-	new__mb_cur_max = 0;
-	for (x = 0; x < 4; ++x) {
-		ei->count[x] = (int)strtol(v, &e, 0);
-		if (v == e || !(v = e)) {
-			free(ei);
-			return (EFTYPE);
-		}
-		if (new__mb_cur_max < ei->count[x])
-			new__mb_cur_max = ei->count[x];
-		while (*v == ' ' || *v == '\t')
-			++v;
-		ei->bits[x] = (int)strtol(v, &e, 0);
-		if (v == e || !(v = e)) {
-			free(ei);
-			return (EFTYPE);
-		}
-		while (*v == ' ' || *v == '\t')
-			++v;
-	}
-	ei->mask = (int)strtol(v, &e, 0);
-	if (v == e || !(v = e)) {
-		free(ei);
-		return (EFTYPE);
-	}
-	rl->__variable = ei;
-	rl->__variable_len = sizeof(_EucInfo);
-	l->runes = rl;
-	l->__mb_cur_max = new__mb_cur_max;
-	l->__mbrtowc = _EUC_mbrtowc;
-	l->__wcrtomb = _EUC_wcrtomb;
+static size_t
+_EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+	size_t nwc, size_t len, mbstate_t * __restrict ps)
+{
+	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb));
+}
+
+/*
+ * EUC-KR uses only CS0 and CS1.
+ */
+int
+_EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl)
+{
+	l->__mbrtowc = _EUC_KR_mbrtowc;
+	l->__wcrtomb = _EUC_KR_wcrtomb;
+	l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs;
+	l->__wcsnrtombs = _EUC_KR_wcsnrtombs;
 	l->__mbsinit = _EUC_mbsinit;
-	l->__mb_sb_limit = 256;
+
+	l->runes = rl;
+	l->__mb_cur_max = 2;
+	l->__mb_sb_limit = 128;
 	return (0);
 }
 
-static int
-_EUC_mbsinit(const mbstate_t *ps)
+static size_t
+_EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
+    size_t n, mbstate_t * __restrict ps)
 {
+	return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0));
+}
 
-	return (ps == NULL || ((const _EucState *)ps)->want == 0);
+static size_t
+_EUC_KR_mbsnrtowcs(wchar_t * __restrict dst,
+    const char ** __restrict src,
+    size_t nms, size_t len, mbstate_t * __restrict ps)
+{
+	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc));
 }
 
-#define	CEI	((_EucInfo *)(_CurrentRuneLocale->__variable))
+static size_t
+_EUC_KR_wcrtomb(char * __restrict s, wchar_t wc,
+	mbstate_t * __restrict ps)
+{
+	return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
+}
 
-#define	_SS2	0x008e
-#define	_SS3	0x008f
+static size_t
+_EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+	size_t nwc, size_t len, mbstate_t * __restrict ps)
+{
+	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb));
+}
 
-#define	GR_BITS	0x80808080 /* XXX: to be fixed */
+/*
+ * EUC-JP uses CS0, CS1, CS2, and CS3.
+ */
+int
+_EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl)
+{
+	l->__mbrtowc = _EUC_JP_mbrtowc;
+	l->__wcrtomb = _EUC_JP_wcrtomb;
+	l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs;
+	l->__wcsnrtombs = _EUC_JP_wcsnrtombs;
+	l->__mbsinit = _EUC_mbsinit;
 
-static __inline int
-_euc_set(u_int c)
+	l->runes = rl;
+	l->__mb_cur_max = 3;
+	l->__mb_sb_limit = 196;
+	return (0);
+}
+
+static size_t
+_EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
+    size_t n, mbstate_t * __restrict ps)
 {
+	return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3));
+}
 
-	c &= 0xff;
-	return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0);
+static size_t
+_EUC_JP_mbsnrtowcs(wchar_t * __restrict dst,
+    const char ** __restrict src,
+    size_t nms, size_t len, mbstate_t * __restrict ps)
+{
+	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc));
 }
 
 static size_t
-_EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
+_EUC_JP_wcrtomb(char * __restrict s, wchar_t wc,
     mbstate_t * __restrict ps)
 {
+	return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
+}
+
+static size_t
+_EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+	size_t nwc, size_t len, mbstate_t * __restrict ps)
+{
+	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb));
+}
+
+/*
+ * EUC-TW uses CS0, CS1, and CS2.
+ */
+int
+_EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl)
+{
+	l->__mbrtowc = _EUC_TW_mbrtowc;
+	l->__wcrtomb = _EUC_TW_wcrtomb;
+	l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs;
+	l->__wcsnrtombs = _EUC_TW_wcsnrtombs;
+	l->__mbsinit = _EUC_mbsinit;
+
+	l->runes = rl;
+	l->__mb_cur_max = 4;
+	l->__mb_sb_limit = 256;
+	return (0);
+}
+
+static size_t
+_EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
+	size_t n, mbstate_t * __restrict ps)
+{
+	return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
+}
+
+static size_t
+_EUC_TW_mbsnrtowcs(wchar_t * __restrict dst,
+	const char ** __restrict src,
+	size_t nms, size_t len, mbstate_t * __restrict ps)
+{
+	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc));
+}
+
+static size_t
+_EUC_TW_wcrtomb(char * __restrict s, wchar_t wc,
+	mbstate_t * __restrict ps)
+{
+	return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
+}
+
+static size_t
+_EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+	size_t nwc, size_t len, mbstate_t * __restrict ps)
+{
+	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb));
+}
+
+/*
+ * Common EUC code.
+ */
+
+static size_t
+_EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s,
+	size_t n, mbstate_t * __restrict ps,
+	uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
+{
 	_EucState *es;
-	int i, set, want;
+	int i, want;
 	wchar_t wc;
-	const char *os;
+	unsigned char ch;
 
 	es = (_EucState *)ps;
 
-	if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 ||
-	    es->set > 3) {
+	if (es->want < 0 || es->want > MB_CUR_MAX) {
 		errno = EINVAL;
 		return ((size_t)-1);
 	}
@@ -176,58 +337,59 @@ _EUC_mbrtowc(wchar_t * __restrict pwc, c
 		/* Incomplete multibyte sequence */
 		return ((size_t)-2);
 
-	os = s;
-
 	if (es->want == 0) {
-		want = CEI->count[set = _euc_set(*s)];
-		if (set == 2 || set == 3) {
-			--want;
-			if (--n == 0) {
-				/* Incomplete multibyte sequence */
-				es->set = set;
-				es->want = want;
-				es->ch = 0;
-				return ((size_t)-2);
-			}
-			++s;
-			if (*s == '\0') {
-				errno = EILSEQ;
-				return ((size_t)-1);
-			}
+		/* Fast path for plain ASCII (CS0) */
+		if (((ch = (unsigned char)*s) & 0x80) == 0) {
+			if (pwc != NULL)
+				*pwc = ch;
+			return (ch != '\0' ? 1 : 0);
 		}
-		wc = (unsigned char)*s++;
+
+		if (ch >= 0xa1) {
+			/* CS1 */
+			want = 2;
+		} else if (ch == cs2) {
+			want = cs2width;
+		} else if (ch == cs3) {
+			want = cs3width;
+		} else {
+			errno = EILSEQ;
+			return ((size_t)-1);
+		}
+
+
+		es->want = want;
+		es->ch = 0;
 	} else {
-		set = es->set;
 		want = es->want;
 		wc = es->ch;
 	}
-	for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) {
-		if (*s == '\0') {
-			errno = EILSEQ;
-			return ((size_t)-1);
-		}
-		wc = (wc << 8) | (unsigned char)*s++;
+
+	for (i = 0; i < MIN(want, n); i++) {
+		wc <<= 8;
+		wc |= *s;
+		s++;
 	}
 	if (i < want) {
 		/* Incomplete multibyte sequence */
-		es->set = set;
 		es->want = want - i;
 		es->ch = wc;
 		return ((size_t)-2);
 	}
-	wc = (wc & ~CEI->mask) | CEI->bits[set];
 	if (pwc != NULL)
 		*pwc = wc;
 	es->want = 0;
-	return (wc == L'\0' ? 0 : s - os);
+	return (wc == L'\0' ? 0 : want);
 }
 
 static size_t
-_EUC_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
+_EUC_wcrtomb_impl(char * __restrict s, wchar_t wc,
+    mbstate_t * __restrict ps,
+    uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
 {
 	_EucState *es;
-	wchar_t m, nm;
 	int i, len;
+	wchar_t nm;
 
 	es = (_EucState *)ps;
 
@@ -240,34 +402,52 @@ _EUC_wcrtomb(char * __restrict s, wchar_
 		/* Reset to initial shift state (no-op) */
 		return (1);
 
-	m = wc & CEI->mask;
-	nm = wc & ~m;
+	if ((wc & ~0x7f) == 0) {
+		/* Fast path for plain ASCII (CS0) */
+		*s = (char)wc;
+		return (1);
+	}
 
-	if (m == CEI->bits[1]) {
-CodeSet1:
-		/* Codeset 1: The first byte must have 0x80 in it. */
-		i = len = CEI->count[1];
-		while (i-- > 0)
-			*s++ = (nm >> (i << 3)) | 0x80;
+	/* Determine the "length" */
+	if ((unsigned)wc > 0xffffff) {
+		len = 4;
+	} else if ((unsigned)wc > 0xffff) {
+		len = 3;
+	} else if ((unsigned)wc > 0xff) {
+		len = 2;
 	} else {
-		if (m == CEI->bits[0])
-			i = len = CEI->count[0];
-		else if (m == CEI->bits[2]) {
-			i = len = CEI->count[2];
-			*s++ = _SS2;
-			--i;
-			/* SS2 designates G2 into GR */
-			nm |= GR_BITS;
-		} else if (m == CEI->bits[3]) {
-			i = len = CEI->count[3];
-			*s++ = _SS3;
-			--i;
-			/* SS3 designates G3 into GR */
-			nm |= GR_BITS;
-		} else
-			goto CodeSet1;	/* Bletch */
-		while (i-- > 0)
-			*s++ = (nm >> (i << 3)) & 0xff;
+		len = 1;
+	}
+
+	if (len > MB_CUR_MAX) {
+		errno = EILSEQ;
+		return ((size_t)-1);
+	}
+
+	/* This first check excludes CS1, which is implicitly valid. */
+	if ((wc < 0xa100) || (wc > 0xffff)) {
+		/* Check for valid CS2 or CS3 */
+		nm = (wc >> ((len - 1) * 8));
+		if (nm == cs2) {
+			if (len != cs2width) {
+				errno = EILSEQ;
+				return ((size_t)-1);
+			}
+		} else if (nm == cs3) {
+			if (len != cs3width) {
+				errno = EILSEQ;
+				return ((size_t)-1);
+			}
+		} else {
+			errno = EILSEQ;
+			return ((size_t)-1);
+		}
+	}
+
+	/* Stash the bytes, least significant last */
+	for (i = len - 1; i >= 0; i--) {
+		s[i] = (wc & 0xff);
+		wc >>= 8;
 	}
 	return (len);
 }

Modified: projects/collation/lib/libc/locale/gb18030.c
==============================================================================
--- projects/collation/lib/libc/locale/gb18030.c	Sat Aug  8 18:14:59 2015	(r286458)
+++ projects/collation/lib/libc/locale/gb18030.c	Sat Aug  8 18:22:14 2015	(r286459)
@@ -1,4 +1,6 @@
 /*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2002-2004 Tim J. Robbins
  * All rights reserved.
  *
@@ -28,6 +30,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
+
 /*
  * PRC National Standard GB 18030-2000 encoding of Chinese text.
  *
@@ -49,6 +52,13 @@ static size_t	_GB18030_mbrtowc(wchar_t *
 static int	_GB18030_mbsinit(const mbstate_t *);
 static size_t	_GB18030_wcrtomb(char * __restrict, wchar_t,
 		    mbstate_t * __restrict);
+static size_t	_GB18030_mbsnrtowcs(wchar_t * __restrict,
+		    const char ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+static size_t	_GB18030_wcsnrtombs(char * __restrict,
+		    const wchar_t ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+
 
 typedef struct {
 	int	count;
@@ -62,6 +72,8 @@ _GB18030_init(struct xlocale_ctype *l, _
 	l->__mbrtowc = _GB18030_mbrtowc;
 	l->__wcrtomb = _GB18030_wcrtomb;
 	l->__mbsinit = _GB18030_mbsinit;
+	l->__mbsnrtowcs = _GB18030_mbsnrtowcs;
+	l->__wcsnrtombs = _GB18030_wcsnrtombs;
 	l->runes = rl;
 	l->__mb_cur_max = 4;
 	l->__mb_sb_limit = 128;
@@ -222,3 +234,19 @@ ilseq:
 	errno = EILSEQ;
 	return ((size_t)-1);
 }
+
+static size_t
+_GB18030_mbsnrtowcs(wchar_t * __restrict dst,
+    const char ** __restrict src, size_t nms, size_t len,
+    mbstate_t * __restrict ps)
+{
+	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB18030_mbrtowc));
+}
+
+static size_t
+_GB18030_wcsnrtombs(char * __restrict dst,
+    const wchar_t ** __restrict src, size_t nwc, size_t len,
+    mbstate_t * __restrict ps)
+{
+	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB18030_wcrtomb));
+}

Modified: projects/collation/lib/libc/locale/gb2312.c
==============================================================================
--- projects/collation/lib/libc/locale/gb2312.c	Sat Aug  8 18:14:59 2015	(r286458)
+++ projects/collation/lib/libc/locale/gb2312.c	Sat Aug  8 18:22:14 2015	(r286459)
@@ -1,4 +1,6 @@
 /*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2004 Tim J. Robbins. All rights reserved.
  * Copyright (c) 2003 David Xu <davidxu@freebsd.org>
  * All rights reserved.
@@ -45,6 +47,13 @@ static size_t	_GB2312_mbrtowc(wchar_t * 
 static int	_GB2312_mbsinit(const mbstate_t *);
 static size_t	_GB2312_wcrtomb(char * __restrict, wchar_t,
 		    mbstate_t * __restrict);
+static size_t	_GB2312_mbsnrtowcs(wchar_t * __restrict,
+		    const char ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+static size_t	_GB2312_wcsnrtombs(char * __restrict,
+		    const wchar_t ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+
 
 typedef struct {
 	int	count;
@@ -59,6 +68,8 @@ _GB2312_init(struct xlocale_ctype *l, _R
 	l->__mbrtowc = _GB2312_mbrtowc;
 	l->__wcrtomb = _GB2312_wcrtomb;
 	l->__mbsinit = _GB2312_mbsinit;
+	l->__mbsnrtowcs = _GB2312_mbsnrtowcs;
+	l->__wcsnrtombs = _GB2312_wcsnrtombs;
 	l->__mb_cur_max = 2;
 	l->__mb_sb_limit = 128;
 	return (0);
@@ -71,7 +82,7 @@ _GB2312_mbsinit(const mbstate_t *ps)
 	return (ps == NULL || ((const _GB2312State *)ps)->count == 0);
 }
 
-static __inline int
+static int
 _GB2312_check(const char *str, size_t n)
 {
 	const u_char *s = (const u_char *)str;
@@ -90,7 +101,7 @@ _GB2312_check(const char *str, size_t n)
 	} else if (s[0] & 0x80) {
 		/* Invalid multibyte sequence */
 		return (-1);
-	} 
+	}
 	return (1);
 }
 
@@ -158,3 +169,19 @@ _GB2312_wcrtomb(char * __restrict s, wch
 	*s = wc & 0xff;
 	return (1);
 }
+
+static size_t
+_GB2312_mbsnrtowcs(wchar_t * __restrict dst,
+    const char ** __restrict src, size_t nms, size_t len,
+    mbstate_t * __restrict ps)
+{
+	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB2312_mbrtowc));
+}
+
+static size_t
+_GB2312_wcsnrtombs(char * __restrict dst,
+    const wchar_t ** __restrict src, size_t nwc, size_t len,
+    mbstate_t * __restrict ps)
+{
+	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB2312_wcrtomb));
+}

Modified: projects/collation/lib/libc/locale/gbk.c
==============================================================================
--- projects/collation/lib/libc/locale/gbk.c	Sat Aug  8 18:14:59 2015	(r286458)
+++ projects/collation/lib/libc/locale/gbk.c	Sat Aug  8 18:22:14 2015	(r286459)
@@ -1,4 +1,6 @@
 /*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
@@ -54,6 +56,12 @@ static size_t	_GBK_mbrtowc(wchar_t * __r
 static int	_GBK_mbsinit(const mbstate_t *);
 static size_t	_GBK_wcrtomb(char * __restrict, wchar_t,
 		    mbstate_t * __restrict);
+static size_t	_GBK_mbsnrtowcs(wchar_t * __restrict,
+		    const char ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+static size_t	_GBK_wcsnrtombs(char * __restrict,
+		    const wchar_t ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
 
 typedef struct {
 	wchar_t	ch;
@@ -66,6 +74,8 @@ _GBK_init(struct xlocale_ctype *l, _Rune
 	l->__mbrtowc = _GBK_mbrtowc;
 	l->__wcrtomb = _GBK_wcrtomb;
 	l->__mbsinit = _GBK_mbsinit;
+	l->__mbsnrtowcs = _GBK_mbsnrtowcs;
+	l->__wcsnrtombs = _GBK_wcsnrtombs;
 	l->runes = rl;
 	l->__mb_cur_max = 2;
 	l->__mb_sb_limit = 128;
@@ -79,7 +89,7 @@ _GBK_mbsinit(const mbstate_t *ps)
 	return (ps == NULL || ((const _GBKState *)ps)->ch == 0);
 }
 
-static __inline int
+static int
 _gbk_check(u_int c)
 {
 
@@ -140,7 +150,7 @@ _GBK_mbrtowc(wchar_t * __restrict pwc, c
 		wc = (wc << 8) | (*s++ & 0xff);
 		if (pwc != NULL)
 			*pwc = wc;
-                return (2);
+		return (2);
 	} else {
 		if (pwc != NULL)
 			*pwc = wc;
@@ -171,3 +181,17 @@ _GBK_wcrtomb(char * __restrict s, wchar_
 	*s = wc & 0xff;
 	return (1);
 }
+
+static size_t
+_GBK_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
+    size_t nms, size_t len, mbstate_t * __restrict ps)
+{
+	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GBK_mbrtowc));
+}
+
+static size_t
+_GBK_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+    size_t nwc, size_t len, mbstate_t * __restrict ps)
+{
+	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GBK_wcrtomb));
+}

Modified: projects/collation/lib/libc/locale/mblocal.h
==============================================================================
--- projects/collation/lib/libc/locale/mblocal.h	Sat Aug  8 18:14:59 2015	(r286458)
+++ projects/collation/lib/libc/locale/mblocal.h	Sat Aug  8 18:22:14 2015	(r286459)
@@ -1,4 +1,6 @@
 /*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2004 Tim J. Robbins.
  * All rights reserved.
  *
@@ -37,6 +39,8 @@
 #include <runetype.h>
 #include "xlocale_private.h"
 
+#define	SS2	0x008e
+#define SS3	0x008f
 
 /*
  * Conversion function pointers for current encoding.
@@ -62,18 +66,24 @@ extern struct xlocale_ctype __xlocale_gl
  * Rune initialization function prototypes.
  */
 int	_none_init(struct xlocale_ctype *, _RuneLocale *);
-int	_ascii_init(struct xlocale_ctype *, _RuneLocale *);
 int	_UTF8_init(struct xlocale_ctype *, _RuneLocale *);
-int	_EUC_init(struct xlocale_ctype *, _RuneLocale *);
+int	_EUC_CN_init(struct xlocale_ctype *, _RuneLocale *);
+int	_EUC_JP_init(struct xlocale_ctype *, _RuneLocale *);
+int	_EUC_KR_init(struct xlocale_ctype *, _RuneLocale *);
+int	_EUC_TW_init(struct xlocale_ctype *, _RuneLocale *);
 int	_GB18030_init(struct xlocale_ctype *, _RuneLocale *);
 int	_GB2312_init(struct xlocale_ctype *, _RuneLocale *);
 int	_GBK_init(struct xlocale_ctype *, _RuneLocale *);
 int	_BIG5_init(struct xlocale_ctype *, _RuneLocale *);
 int	_MSKanji_init(struct xlocale_ctype *, _RuneLocale *);
 
-extern size_t __mbsnrtowcs_std(wchar_t * __restrict, const char ** __restrict,
-	size_t, size_t, mbstate_t * __restrict);
-extern size_t __wcsnrtombs_std(char * __restrict, const wchar_t ** __restrict,
-	size_t, size_t, mbstate_t * __restrict);
+typedef size_t (*mbrtowc_pfn_t)(wchar_t * __restrict,
+    const char * __restrict, size_t, mbstate_t * __restrict);
+typedef size_t (*wcrtomb_pfn_t)(char * __restrict, wchar_t,
+    mbstate_t * __restrict);
+size_t __mbsnrtowcs_std(wchar_t * __restrict, const char ** __restrict,
+    size_t, size_t, mbstate_t * __restrict, mbrtowc_pfn_t);
+size_t __wcsnrtombs_std(char * __restrict, const wchar_t ** __restrict,
+    size_t, size_t, mbstate_t * __restrict, wcrtomb_pfn_t);
 
 #endif	/* _MBLOCAL_H_ */

Modified: projects/collation/lib/libc/locale/mbsnrtowcs.c
==============================================================================
--- projects/collation/lib/libc/locale/mbsnrtowcs.c	Sat Aug  8 18:14:59 2015	(r286458)
+++ projects/collation/lib/libc/locale/mbsnrtowcs.c	Sat Aug  8 18:22:14 2015	(r286459)
@@ -1,4 +1,6 @@
 /*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2002-2004 Tim J. Robbins.
  *
  * Copyright (c) 2011 The FreeBSD Foundation
@@ -56,20 +58,20 @@ mbsnrtowcs(wchar_t * __restrict dst, con
 
 size_t
 __mbsnrtowcs_std(wchar_t * __restrict dst, const char ** __restrict src,
-    size_t nms, size_t len, mbstate_t * __restrict ps)
+    size_t nms, size_t len, mbstate_t * __restrict ps,
+    mbrtowc_pfn_t pmbrtowc)
 {
 	const char *s;
 	size_t nchr;
 	wchar_t wc;
 	size_t nb;
-	struct xlocale_ctype *ct = XLOCALE_CTYPE(__get_locale());
 
 	s = *src;
 	nchr = 0;
 
 	if (dst == NULL) {
 		for (;;) {
-			if ((nb = ct->__mbrtowc(&wc, s, nms, ps)) == (size_t)-1)
+			if ((nb = pmbrtowc(&wc, s, nms, ps)) == (size_t)-1)
 				/* Invalid sequence - mbrtowc() sets errno. */
 				return ((size_t)-1);
 			else if (nb == 0 || nb == (size_t)-2)
@@ -82,7 +84,7 @@ __mbsnrtowcs_std(wchar_t * __restrict ds
 	}
 
 	while (len-- > 0) {
-		if ((nb = ct->__mbrtowc(dst, s, nms, ps)) == (size_t)-1) {
+		if ((nb = pmbrtowc(dst, s, nms, ps)) == (size_t)-1) {
 			*src = s;
 			return ((size_t)-1);
 		} else if (nb == (size_t)-2) {

Modified: projects/collation/lib/libc/locale/mskanji.c
==============================================================================
--- projects/collation/lib/libc/locale/mskanji.c	Sat Aug  8 18:14:59 2015	(r286458)
+++ projects/collation/lib/libc/locale/mskanji.c	Sat Aug  8 18:22:14 2015	(r286459)
@@ -1,4 +1,6 @@
 /*
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
  *
  *    ja_JP.SJIS locale table for BSD4.4/rune
@@ -28,14 +30,14 @@
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE  
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL  
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS  
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)  
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT  
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY  
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF  
- * SUCH DAMAGE.  
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  */  
 
 #if defined(LIBC_SCCS) && !defined(lint)
@@ -59,6 +61,12 @@ static size_t	_MSKanji_mbrtowc(wchar_t *
 static int	_MSKanji_mbsinit(const mbstate_t *);
 static size_t	_MSKanji_wcrtomb(char * __restrict, wchar_t,
 		    mbstate_t * __restrict);
+static size_t	_MSKanji_mbsnrtowcs(wchar_t * __restrict,
+		    const char ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
+static size_t	_MSKanji_wcsnrtombs(char * __restrict,
+		    const wchar_t ** __restrict, size_t, size_t,
+		    mbstate_t * __restrict);
 
 typedef struct {
 	wchar_t	ch;
@@ -70,6 +78,8 @@ _MSKanji_init(struct xlocale_ctype *l, _
 
 	l->__mbrtowc = _MSKanji_mbrtowc;
 	l->__wcrtomb = _MSKanji_wcrtomb;
+	l->__mbsnrtowcs = _MSKanji_mbsnrtowcs;
+	l->__wcsnrtombs = _MSKanji_wcsnrtombs;
 	l->__mbsinit = _MSKanji_mbsinit;
 	l->runes = rl;
 	l->__mb_cur_max = 2;
@@ -163,3 +173,19 @@ _MSKanji_wcrtomb(char * __restrict s, wc
 		*s++ = wc >> (i << 3);
 	return (len);
 }
+
+static size_t
+_MSKanji_mbsnrtowcs(wchar_t * __restrict dst,
+    const char ** __restrict src, size_t nms,
+    size_t len, mbstate_t * __restrict ps)
+{
+	return (__mbsnrtowcs_std(dst, src, nms, len, ps, _MSKanji_mbrtowc));
+}
+
+static size_t
+_MSKanji_wcsnrtombs(char * __restrict dst,
+    const wchar_t ** __restrict src, size_t nwc,
+    size_t len, mbstate_t * __restrict ps)
+{
+	return (__wcsnrtombs_std(dst, src, nwc, len, ps, _MSKanji_wcrtomb));
+}

Modified: projects/collation/lib/libc/locale/none.c
==============================================================================
--- projects/collation/lib/libc/locale/none.c	Sat Aug  8 18:14:59 2015	(r286458)
+++ projects/collation/lib/libc/locale/none.c	Sat Aug  8 18:22:14 2015	(r286459)
@@ -1,4 +1,6 @@
 /*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201508081822.t78IMFfS085095>