Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 21 Sep 2007 06:41:07 +0400
From:      Andrey Chernov <ache@nagual.pp.ru>
To:        Taku YAMAMOTO <taku@tackymt.homeip.net>, Petr Hroudn?? <petr.hroudny@gmail.com>, current@FreeBSD.ORG, perky@FreeBSD.ORG, i18n@FreeBSD.ORG
Subject:   Re: Ctype patch for review
Message-ID:  <20070921024107.GA21223@nagual.pp.ru>
In-Reply-To: <20070919121024.GA81606@nagual.pp.ru>
References:  <20070916192924.GA12678@nagual.pp.ru> <ab8fc7f50709170129p6f436069iffaf697e83a34e3c@mail.gmail.com> <20070917092130.GA24424@nagual.pp.ru> <20070918020100.d43beb0b.taku@tackymt.homeip.net> <20070917171633.GA31179@nagual.pp.ru> <20070919111207.f37653fc.taku@tackymt.homeip.net> <20070919022555.GA70617@nagual.pp.ru> <20070919023625.GA70891@nagual.pp.ru> <20070919051830.GA72429@nagual.pp.ru> <20070919121024.GA81606@nagual.pp.ru>

next in thread | previous in thread | raw e-mail | index | archive | help

--rwEMma7ioTxnRzrJ
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Wed, Sep 19, 2007 at 04:10:24PM +0400, Andrey Chernov wrote:
> Improved vesrsion. Intoduce general __mb_sch_limit parameter instead for 
> all locales specifying upper limit of single char range. It allows also 
> fix the bug when ctype(3) functions called with arg > 0xFF for wide 
> character locales and simplifies all checks. New patch is attached. Here 
> is updated rationale again:

Next improved version, now optimized for speed. I decide to remove extra 
_CTYPE_WID flag and duplicate needed functions instead. 

-- 
http://ache.pp.ru/

--rwEMma7ioTxnRzrJ
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="ctype.patch"

--- Symbol.map.old	2007-09-19 22:37:21.000000000 +0400
+++ Symbol.map	2007-09-21 06:31:56.000000000 +0400
@@ -60,12 +60,17 @@
 	nextwctype;
 	nl_langinfo;
 	__maskrune;
+	__sbmaskrune;
 	__istype;
+	__sbistype;
 	__isctype;
 	__toupper;
+	__sbtoupper;
 	__tolower;
+	__sbtolower;
 	__wcwidth;
 	__mb_cur_max;
+	__mb_sch_limit;
 	rpmatch;
 	___runetype;
 	setlocale;
--- _ctype.h.old	2007-09-16 21:13:59.000000000 +0400
+++ _ctype.h	2007-09-21 06:21:59.000000000 +0400
@@ -87,6 +87,8 @@
 #define	__inline
 #endif
 
+extern int __mb_sch_limit;
+
 /*
  * Use inline functions if we are allowed to and the compiler supports them.
  */
@@ -103,15 +105,28 @@
 }
 
 static __inline int
+__sbmaskrune(__ct_rune_t _c, unsigned long _f)
+{
+	return (_c < 0 || _c >= __mb_sch_limit) ? 0 :
+	       _CurrentRuneLocale->__runetype[_c] & _f;
+}
+
+static __inline int
 __istype(__ct_rune_t _c, unsigned long _f)
 {
 	return (!!__maskrune(_c, _f));
 }
 
 static __inline int
+__sbistype(__ct_rune_t _c, unsigned long _f)
+{
+	return (!!__sbmasksrune(_c, _f));
+}
+
+static __inline int
 __isctype(__ct_rune_t _c, unsigned long _f)
 {
-	return (_c < 0 || _c >= _CACHED_RUNES) ? 0 :
+	return (_c < 0 || _c >= __mb_sch_limit) ? 0 :
 	       !!(_DefaultRuneLocale.__runetype[_c] & _f);
 }
 
@@ -123,12 +138,26 @@
 }
 
 static __inline __ct_rune_t
+__sbtoupper(__ct_rune_t _c)
+{
+	return (_c < 0 || _c >= __mb_sch_limit) ? _c :
+	       _CurrentRuneLocale->__mapupper[_c];
+}
+
+static __inline __ct_rune_t
 __tolower(__ct_rune_t _c)
 {
 	return (_c < 0 || _c >= _CACHED_RUNES) ? ___tolower(_c) :
 	       _CurrentRuneLocale->__maplower[_c];
 }
 
+static __inline __ct_rune_t
+__sbtolower(__ct_rune_t _c)
+{
+	return (_c < 0 || _c >= __mb_sch_limit) ? _c :
+	       _CurrentRuneLocale->__maplower[_c];
+}
+
 static __inline int
 __wcwidth(__ct_rune_t _c)
 {
@@ -146,10 +175,14 @@
 
 __BEGIN_DECLS
 int		__maskrune(__ct_rune_t, unsigned long);
+int		__sbmaskrune(__ct_rune_t, unsigned long);
 int		__istype(__ct_rune_t, unsigned long);
+int		__sbistype(__ct_rune_t, unsigned long);
 int		__isctype(__ct_rune_t, unsigned long);
 __ct_rune_t	__toupper(__ct_rune_t);
+__ct_rune_t	__sbtoupper(__ct_rune_t);
 __ct_rune_t	__tolower(__ct_rune_t);
+__ct_rune_t	__sbtolower(__ct_rune_t);
 int		__wcwidth(__ct_rune_t);
 __END_DECLS
 #endif /* using inlines */
--- big5.c.old	2007-09-19 08:48:55.000000000 +0400
+++ big5.c	2007-09-19 15:41:26.000000000 +0400
@@ -49,6 +49,8 @@
 #include <wchar.h>
 #include "mblocal.h"
 
+extern int __mb_sch_limit;
+
 static size_t	_BIG5_mbrtowc(wchar_t * __restrict, const char * __restrict,
 		    size_t, mbstate_t * __restrict);
 static int	_BIG5_mbsinit(const mbstate_t *);
@@ -68,6 +70,7 @@
 	__mbsinit = _BIG5_mbsinit;
 	_CurrentRuneLocale = rl;
 	__mb_cur_max = 2;
+	__mb_sch_limit = 256;
 	return (0);
 }
 
--- ctype.h.old	2007-09-16 22:03:55.000000000 +0400
+++ ctype.h	2007-09-21 06:26:26.000000000 +0400
@@ -86,19 +86,19 @@
 #endif
 __END_DECLS
 
-#define	isalnum(c)	__istype((c), _CTYPE_A|_CTYPE_D)
-#define	isalpha(c)	__istype((c), _CTYPE_A)
-#define	iscntrl(c)	__istype((c), _CTYPE_C)
+#define	isalnum(c)	__sbistype((c), _CTYPE_A|_CTYPE_D)
+#define	isalpha(c)	__sbistype((c), _CTYPE_A)
+#define	iscntrl(c)	__sbistype((c), _CTYPE_C)
 #define	isdigit(c)	__isctype((c), _CTYPE_D) /* ANSI -- locale independent */
-#define	isgraph(c)	__istype((c), _CTYPE_G)
-#define	islower(c)	__istype((c), _CTYPE_L)
-#define	isprint(c)	__istype((c), _CTYPE_R)
-#define	ispunct(c)	__istype((c), _CTYPE_P)
-#define	isspace(c)	__istype((c), _CTYPE_S)
-#define	isupper(c)	__istype((c), _CTYPE_U)
+#define	isgraph(c)	__sbistype((c), _CTYPE_G)
+#define	islower(c)	__sbistype((c), _CTYPE_L)
+#define	isprint(c)	__sbistype((c), _CTYPE_R)
+#define	ispunct(c)	__sbistype((c), _CTYPE_P)
+#define	isspace(c)	__sbistype((c), _CTYPE_S)
+#define	isupper(c)	__sbistype((c), _CTYPE_U)
 #define	isxdigit(c)	__isctype((c), _CTYPE_X) /* ANSI -- locale independent */
-#define	tolower(c)	__tolower(c)
-#define	toupper(c)	__toupper(c)
+#define	tolower(c)	__sbtolower(c)
+#define	toupper(c)	__sbtoupper(c)
 
 #if __XSI_VISIBLE
 /*
@@ -112,24 +112,24 @@
  *
  * XXX isascii() and toascii() should similarly be undocumented.
  */
-#define	_tolower(c)	__tolower(c)
-#define	_toupper(c)	__toupper(c)
+#define	_tolower(c)	__sbtolower(c)
+#define	_toupper(c)	__sbtoupper(c)
 #define	isascii(c)	(((c) & ~0x7F) == 0)
 #define	toascii(c)	((c) & 0x7F)
 #endif
 
 #if __ISO_C_VISIBLE >= 1999
-#define	isblank(c)	__istype((c), _CTYPE_B)
+#define	isblank(c)	__sbistype((c), _CTYPE_B)
 #endif
 
 #if __BSD_VISIBLE
-#define	digittoint(c)	__maskrune((c), 0xFF)
-#define	ishexnumber(c)	__istype((c), _CTYPE_X)
-#define	isideogram(c)	__istype((c), _CTYPE_I)
-#define	isnumber(c)	__istype((c), _CTYPE_D)
-#define	isphonogram(c)	__istype((c), _CTYPE_Q)
-#define	isrune(c)	__istype((c), 0xFFFFFF00L)
-#define	isspecial(c)	__istype((c), _CTYPE_T)
+#define	digittoint(c)	__sbmaskrune((c), 0xFF)
+#define	ishexnumber(c)	__sbistype((c), _CTYPE_X)
+#define	isideogram(c)	__sbistype((c), _CTYPE_I)
+#define	isnumber(c)	__sbistype((c), _CTYPE_D)
+#define	isphonogram(c)	__sbistype((c), _CTYPE_Q)
+#define	isrune(c)	__sbistype((c), 0xFFFFFF00L)
+#define	isspecial(c)	__sbistype((c), _CTYPE_T)
 #endif
 
 #endif /* !_CTYPE_H_ */
--- euc.c.old	2007-09-19 08:50:57.000000000 +0400
+++ euc.c	2007-09-19 15:41:26.000000000 +0400
@@ -49,6 +49,8 @@
 #include <wchar.h>
 #include "mblocal.h"
 
+extern int __mb_sch_limit;
+
 static size_t	_EUC_mbrtowc(wchar_t * __restrict, const char * __restrict,
 		    size_t, mbstate_t * __restrict);
 static int	_EUC_mbsinit(const mbstate_t *);
@@ -116,6 +118,7 @@
 	__mbrtowc = _EUC_mbrtowc;
 	__wcrtomb = _EUC_wcrtomb;
 	__mbsinit = _EUC_mbsinit;
+	__mb_sch_limit = 256;
 	return (0);
 }
 
--- gb18030.c.old	2007-09-19 08:59:01.000000000 +0400
+++ gb18030.c	2007-09-19 15:41:26.000000000 +0400
@@ -39,6 +39,8 @@
 #include <wchar.h>
 #include "mblocal.h"
 
+extern int __mb_sch_limit;
+
 static size_t	_GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict,
 		    size_t, mbstate_t * __restrict);
 static int	_GB18030_mbsinit(const mbstate_t *);
@@ -59,6 +61,7 @@
 	__mbsinit = _GB18030_mbsinit;
 	_CurrentRuneLocale = rl;
 	__mb_cur_max = 4;
+	__mb_sch_limit = 256;
 
 	return (0);
 }
--- gb2312.c.old	2007-09-19 09:00:35.000000000 +0400
+++ gb2312.c	2007-09-19 15:41:26.000000000 +0400
@@ -35,6 +35,8 @@
 #include <wchar.h>
 #include "mblocal.h"
 
+extern int __mb_sch_limit;
+
 static size_t	_GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict,
 		    size_t, mbstate_t * __restrict);
 static int	_GB2312_mbsinit(const mbstate_t *);
@@ -55,6 +57,7 @@
 	__wcrtomb = _GB2312_wcrtomb;
 	__mbsinit = _GB2312_mbsinit;
 	__mb_cur_max = 2;
+	__mb_sch_limit = 256;
 	return (0);
 }
 
--- gbk.c.old	2007-09-19 09:01:33.000000000 +0400
+++ gbk.c	2007-09-19 15:41:26.000000000 +0400
@@ -42,6 +42,8 @@
 #include <wchar.h>
 #include "mblocal.h"
 
+extern int __mb_sch_limit;
+
 static size_t	_GBK_mbrtowc(wchar_t * __restrict, const char * __restrict,
 		    size_t, mbstate_t * __restrict);
 static int	_GBK_mbsinit(const mbstate_t *);
@@ -61,6 +63,7 @@
 	__mbsinit = _GBK_mbsinit;
 	_CurrentRuneLocale = rl;
 	__mb_cur_max = 2;
+	__mb_sch_limit = 256;
 	return (0);
 }
 
--- isctype.c.old	2007-09-16 22:31:26.000000000 +0400
+++ isctype.c	2007-09-21 06:28:30.000000000 +0400
@@ -48,7 +48,7 @@
 digittoint(c)
 	int c;
 {
-	return (__maskrune(c, 0xFF));
+	return (__sbmaskrune(c, 0xFF));
 }
 
 #undef isalnum
@@ -56,7 +56,7 @@
 isalnum(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_A|_CTYPE_D));
+	return (__sbistype(c, _CTYPE_A|_CTYPE_D));
 }
 
 #undef isalpha
@@ -64,7 +64,7 @@
 isalpha(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_A));
+	return (__sbistype(c, _CTYPE_A));
 }
 
 #undef isascii
@@ -80,7 +80,7 @@
 isblank(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_B));
+	return (__sbistype(c, _CTYPE_B));
 }
 
 #undef iscntrl
@@ -88,7 +88,7 @@
 iscntrl(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_C));
+	return (__sbistype(c, _CTYPE_C));
 }
 
 #undef isdigit
@@ -104,7 +104,7 @@
 isgraph(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_G));
+	return (__sbistype(c, _CTYPE_G));
 }
 
 #undef ishexnumber 
@@ -112,7 +112,7 @@
 ishexnumber(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_X));
+	return (__sbistype(c, _CTYPE_X));
 }
 
 #undef isideogram
@@ -120,7 +120,7 @@
 isideogram(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_I));
+	return (__sbistype(c, _CTYPE_I));
 }
 
 #undef islower
@@ -128,7 +128,7 @@
 islower(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_L));
+	return (__sbistype(c, _CTYPE_L));
 }
 
 #undef isnumber
@@ -136,7 +136,7 @@
 isnumber(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_D));
+	return (__sbistype(c, _CTYPE_D));
 }
 
 #undef isphonogram	
@@ -144,7 +144,7 @@
 isphonogram(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_Q));
+	return (__sbistype(c, _CTYPE_Q));
 }
 
 #undef isprint
@@ -152,7 +152,7 @@
 isprint(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_R));
+	return (__sbistype(c, _CTYPE_R));
 }
 
 #undef ispunct
@@ -160,7 +160,7 @@
 ispunct(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_P));
+	return (__sbistype(c, _CTYPE_P));
 }
 
 #undef isrune
@@ -168,7 +168,7 @@
 isrune(c)
 	int c;
 {
-	return (__istype(c, 0xFFFFFF00L));
+	return (__sbistype(c, 0xFFFFFF00L));
 }
 
 #undef isspace
@@ -176,7 +176,7 @@
 isspace(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_S));
+	return (__sbistype(c, _CTYPE_S));
 }
 
 #undef isspecial
@@ -184,7 +184,7 @@
 isspecial(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_T));
+	return (__sbistype(c, _CTYPE_T));
 }
 
 #undef isupper
@@ -192,7 +192,7 @@
 isupper(c)
 	int c;
 {
-	return (__istype(c, _CTYPE_U));
+	return (__sbistype(c, _CTYPE_U));
 }
 
 #undef isxdigit
@@ -216,7 +216,7 @@
 tolower(c)
 	int c;
 {
-        return (__tolower(c));
+	return (__sbtolower(c));
 }
 
 #undef toupper
@@ -224,6 +224,6 @@
 toupper(c)
 	int c;
 {
-        return (__toupper(c));
+	return (__sbtoupper(c));
 }
 
--- iswctype.c.old	2007-09-16 22:31:30.000000000 +0400
+++ iswctype.c	2007-09-21 06:29:59.000000000 +0400
@@ -61,7 +61,7 @@
 iswascii(wc)
 	wint_t wc;
 {
-	return ((wc & ~0x7F) == 0);
+	return (wc < 0x80);
 }
 
 #undef iswblank
--- mskanji.c.old	2007-09-19 09:02:56.000000000 +0400
+++ mskanji.c	2007-09-19 15:41:26.000000000 +0400
@@ -47,6 +47,8 @@
 #include <wchar.h>
 #include "mblocal.h"
 
+extern int __mb_sch_limit;
+
 static size_t	_MSKanji_mbrtowc(wchar_t * __restrict, const char * __restrict,
 		    size_t, mbstate_t * __restrict);
 static int	_MSKanji_mbsinit(const mbstate_t *);
@@ -66,6 +68,7 @@
 	__mbsinit = _MSKanji_mbsinit;
 	_CurrentRuneLocale = rl;
 	__mb_cur_max = 2;
+	__mb_sch_limit = 256;
 	return (0);
 }
 
--- none.c.old	2007-09-19 08:56:40.000000000 +0400
+++ none.c	2007-09-19 21:16:11.000000000 +0400
@@ -58,6 +58,11 @@
 static size_t	_none_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
 		    size_t, size_t, mbstate_t * __restrict);
 
+/* setup defaults */
+
+int __mb_cur_max = 1;
+int __mb_sch_limit = 256; /* Expected to be <= _CACHED_RUNES */
+
 int
 _none_init(_RuneLocale *rl)
 {
@@ -69,6 +74,7 @@
 	__wcsnrtombs = _none_wcsnrtombs;
 	_CurrentRuneLocale = rl;
 	__mb_cur_max = 1;
+	__mb_sch_limit = 256;
 	return(0);
 }
 
@@ -176,7 +182,6 @@
 
 /* setup defaults */
 
-int __mb_cur_max = 1;
 size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t,
     mbstate_t * __restrict) = _none_mbrtowc;
 int (*__mbsinit)(const mbstate_t *) = _none_mbsinit;
--- setrunelocale.c.old	2007-09-19 09:03:59.000000000 +0400
+++ setrunelocale.c	2007-09-19 15:41:26.000000000 +0400
@@ -45,6 +45,8 @@
 #include "mblocal.h"
 #include "setlocale.h"
 
+extern int __mb_sch_limit;
+
 extern _RuneLocale	*_Read_RuneMagi(FILE *);
 
 static int		__setrunelocale(const char *);
@@ -59,6 +61,7 @@
 	static char ctype_encoding[ENCODING_LEN + 1];
 	static _RuneLocale *CachedRuneLocale;
 	static int Cached__mb_cur_max;
+	static int Cached__mb_sch_limit;
 	static size_t (*Cached__mbrtowc)(wchar_t * __restrict,
 	    const char * __restrict, size_t, mbstate_t * __restrict);
 	static size_t (*Cached__wcrtomb)(char * __restrict, wchar_t,
@@ -85,6 +88,7 @@
 	    strcmp(encoding, ctype_encoding) == 0) {
 		_CurrentRuneLocale = CachedRuneLocale;
 		__mb_cur_max = Cached__mb_cur_max;
+		__mb_sch_limit = Cached__mb_sch_limit;
 		__mbrtowc = Cached__mbrtowc;
 		__mbsinit = Cached__mbsinit;
 		__mbsnrtowcs = Cached__mbsnrtowcs;
@@ -147,6 +151,7 @@
 		}
 		CachedRuneLocale = _CurrentRuneLocale;
 		Cached__mb_cur_max = __mb_cur_max;
+		Cached__mb_sch_limit = __mb_sch_limit;
 		Cached__mbrtowc = __mbrtowc;
 		Cached__mbsinit = __mbsinit;
 		Cached__mbsnrtowcs = __mbsnrtowcs;
--- utf8.c.old	2007-09-19 08:18:40.000000000 +0400
+++ utf8.c	2007-09-19 15:55:35.000000000 +0400
@@ -35,6 +35,8 @@
 #include <wchar.h>
 #include "mblocal.h"
 
+extern int __mb_sch_limit;
+
 static size_t	_UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict,
 		    size_t, mbstate_t * __restrict);
 static int	_UTF8_mbsinit(const mbstate_t *);
@@ -63,6 +65,7 @@
 	__wcsnrtombs = _UTF8_wcsnrtombs;
 	_CurrentRuneLocale = rl;
 	__mb_cur_max = 6;
+	__mb_sch_limit = 128;
 
 	return (0);
 }
--- wctype.h.old	2007-09-16 21:59:37.000000000 +0400
+++ wctype.h	2007-09-21 06:08:40.000000000 +0400
@@ -106,7 +106,7 @@
 #define	towupper(wc)		__toupper(wc)
 
 #if __BSD_VISIBLE
-#define	iswascii(wc)		(((wc) & ~0x7F) == 0)
+#define	iswascii(wc)		((wc) < 0x80)
 #define	iswhexnumber(wc)	__istype((wc), _CTYPE_X)
 #define	iswideogram(wc)		__istype((wc), _CTYPE_I)
 #define	iswnumber(wc)		__istype((wc), _CTYPE_D)

--rwEMma7ioTxnRzrJ--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20070921024107.GA21223>