Date: Sun, 3 Aug 2008 20:57:45 GMT From: Konrad Jankowski <konrad@FreeBSD.org> To: Perforce Change Reviews <perforce@FreeBSD.org> Subject: PERFORCE change 146557 for review Message-ID: <200808032057.m73KvjmO094526@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=146557 Change 146557 by konrad@vspredator on 2008/08/03 20:57:32 Temporarily reverted changes, so that correct deltas can be seen. Affected files ... .. //depot/projects/soc2008/konrad_collation/libc/locale/collate.c#6 edit .. //depot/projects/soc2008/konrad_collation/libc/locale/collate.h#5 edit .. //depot/projects/soc2008/konrad_collation/libc/locale/collcmp.c#5 edit .. //depot/projects/soc2008/konrad_collation/libc/locale/setlocale.c#5 edit .. //depot/projects/soc2008/konrad_collation/libc/string/strcoll.c#5 edit .. //depot/projects/soc2008/konrad_collation/libc/string/strxfrm.c#5 edit .. //depot/projects/soc2008/konrad_collation/libc/string/wcscoll.c#5 edit .. //depot/projects/soc2008/konrad_collation/libc/string/wcsxfrm.c#5 edit Differences ... ==== //depot/projects/soc2008/konrad_collation/libc/locale/collate.c#6 (text+ko) ==== @@ -26,26 +26,16 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.33 2004/09/22 16:56:48 stefanf Exp $"); - -#define __collate_chain_equiv_table (__collate_data->__chain_equiv_table) -#define __collate_chain_pri_table (__collate_data->__chain_pri_table) -#define __collate_char_pri_table (__collate_data->__char_pri_table) -#define __collate_info (&__collate_data->__info) -#define __collate_large_char_pri_table (__collate_data->__large_char_pri_table) -#define __collate_substitute_table (__collate_data->__substitute_table) +__FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.35 2005/02/27 20:31:13 ru Exp $"); #include "namespace.h" #include <arpa/inet.h> #include <stdio.h> #include <stdlib.h> -#include <stddef.h> #include <string.h> -#include <wchar.h> #include <errno.h> #include <unistd.h> #include <sysexits.h> -#include <ctype.h> #include "un-namespace.h" #include "collate.h" @@ -54,25 +44,24 @@ #include "libc_private.h" -#if _BYTE_ORDER == _LITTLE_ENDIAN -static void wntohl(wchar_t *, int); -#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ -void __collate_err(int ex, const char *f) __dead2; - -#undef __collate_load_error int __collate_load_error = 1; int __collate_substitute_nontrivial; -struct __locale_st_collate *__collate_data = NULL; + +u_char __collate_substitute_table[UCHAR_MAX + 1][STR_LEN]; +struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1]; +struct __collate_st_chain_pri *__collate_chain_pri_table; + +void __collate_err(int ex, const char *f) __dead2; int __collate_load_tables(const char *encoding) { FILE *fp; - int i, saverr, chains, z; + int i, saverr, chains; + uint32_t u32; char strbuf[STR_LEN], buf[PATH_MAX]; - struct __locale_st_collate *TMP; - struct __collate_st_info info; - void *vp; + void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table; + static char collate_encoding[ENCODING_LEN + 1]; /* 'encoding' must be already checked. */ if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { @@ -83,7 +72,7 @@ /* * If the locale name is the same as our cache, use the cache. */ - if (__collate_data && strcmp(encoding, __collate_data->__encoding) == 0) { + if (strcmp(encoding, collate_encoding) == 0) { __collate_load_error = 0; return (_LDP_CACHE); } @@ -98,9 +87,6 @@ (void)strcat(buf, "/"); (void)strcat(buf, encoding); (void)strcat(buf, "/LC_COLLATE"); -#ifdef LOCALE_DEBUG - fprintf(stderr, "__collate_load_tables: opening %s\n", buf); -#endif if ((fp = fopen(buf, "r")) == NULL) return (_LDP_ERROR); @@ -111,30 +97,23 @@ return (_LDP_ERROR); } chains = -1; - if (strcmp(strbuf, COLLATE_VERSION1_3) == 0) + if (strcmp(strbuf, COLLATE_VERSION) == 0) + chains = 0; + else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0) chains = 1; if (chains < 0) { - fprintf(stderr, "__collate_load_tables: wrong signature: %s\n", strbuf); (void)fclose(fp); errno = EFTYPE; return (_LDP_ERROR); } if (chains) { - if (fread(&info, sizeof(info), 1, fp) != 1) { + if (fread(&u32, sizeof(u32), 1, fp) != 1) { saverr = errno; (void)fclose(fp); errno = saverr; return (_LDP_ERROR); } -#if _BYTE_ORDER == _LITTLE_ENDIAN - for(z = 0; z < info.directive_count; z++) { - info.undef_pri[z] = ntohl(info.undef_pri[z]); - info.subst_count[z] = ntohl(info.subst_count[z]); - } - info.chain_count = ntohl(info.chain_count); - info.large_pri_count = ntohl(info.large_pri_count); -#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ - if ((chains = info.chain_count) < 0) { + if ((chains = (int)ntohl(u32)) < 1) { (void)fclose(fp); errno = EFTYPE; return (_LDP_ERROR); @@ -142,13 +121,26 @@ } else chains = TABLE_SIZE; - i = sizeof(struct __locale_st_collate) - + sizeof(struct __collate_st_chain_pri) * chains - + sizeof(struct __collate_st_large_char_pri) * info.large_pri_count; - for(z = 0; z < info.directive_count; z++) - i += sizeof(struct __collate_st_subst) * info.subst_count[z]; - if ((TMP = (struct __locale_st_collate *)malloc(i)) == NULL) { + if ((TMP_substitute_table = + malloc(sizeof(__collate_substitute_table))) == NULL) { + saverr = errno; + (void)fclose(fp); + errno = saverr; + return (_LDP_ERROR); + } + if ((TMP_char_pri_table = + malloc(sizeof(__collate_char_pri_table))) == NULL) { + saverr = errno; + free(TMP_substitute_table); + (void)fclose(fp); + errno = saverr; + return (_LDP_ERROR); + } + if ((TMP_chain_pri_table = + malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) { saverr = errno; + free(TMP_substitute_table); + free(TMP_char_pri_table); (void)fclose(fp); errno = saverr; return (_LDP_ERROR); @@ -158,415 +150,114 @@ { \ if (fread(a, b, c, d) != c) { \ saverr = errno; \ - free(TMP); \ + free(TMP_substitute_table); \ + free(TMP_char_pri_table); \ + free(TMP_chain_pri_table); \ (void)fclose(d); \ errno = saverr; \ return (_LDP_ERROR); \ } \ } - /* adjust size to read the remaining in one chunk */ - i -= offsetof(struct __locale_st_collate, __char_pri_table); - FREAD(TMP->__char_pri_table, i, 1, fp); + FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp); + FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp); + FREAD(TMP_chain_pri_table, + sizeof(*__collate_chain_pri_table), chains, fp); (void)fclose(fp); - vp = (void *)(TMP + 1); - - /* the COLLATE_SUBST_DUP optimization relies on COLL_WEIGHTS_MAX == 2 */ - if (info.subst_count[0] > 0) { - TMP->__substitute_table[0] = (struct __collate_st_subst *)vp; - vp += info.subst_count[0] * sizeof(struct __collate_st_subst); - } else - TMP->__substitute_table[0] = NULL; - if (info.flags & COLLATE_SUBST_DUP) - TMP->__substitute_table[1] = TMP->__substitute_table[0]; - else if (info.subst_count[1] > 0) { - TMP->__substitute_table[1] = (struct __collate_st_subst *)vp; - vp += info.subst_count[1] * sizeof(struct __collate_st_subst); - } else - TMP->__substitute_table[1] = NULL; - - if (chains > 0) { - TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp; - vp += chains * sizeof(struct __collate_st_chain_pri); - } else - TMP->__chain_pri_table = NULL; - if (info.large_pri_count > 0) - TMP->__large_char_pri_table = - (struct __collate_st_large_char_pri *)vp; - else - TMP->__large_char_pri_table = NULL; - -#if _BYTE_ORDER == _LITTLE_ENDIAN - { - struct __collate_st_char_pri *p = TMP->__char_pri_table; - for(i = UCHAR_MAX + 1; i-- > 0; p++) { - for(z = 0; z < info.directive_count; z++) - p->pri[z] = ntohl(p->pri[z]); - } + (void)strcpy(collate_encoding, encoding); + if (__collate_substitute_table_ptr != NULL) + free(__collate_substitute_table_ptr); + __collate_substitute_table_ptr = TMP_substitute_table; + if (__collate_char_pri_table_ptr != NULL) + free(__collate_char_pri_table_ptr); + __collate_char_pri_table_ptr = TMP_char_pri_table; + for (i = 0; i < UCHAR_MAX + 1; i++) { + __collate_char_pri_table[i].prim = + ntohl(__collate_char_pri_table[i].prim); + __collate_char_pri_table[i].sec = + ntohl(__collate_char_pri_table[i].sec); } - for(z = 0; z < info.directive_count; z++) - if (info.subst_count[z] > 0) { - struct __collate_st_subst *p = - TMP->__substitute_table[z]; - for(i = info.subst_count[z]; i-- > 0; p++) { - p->val = ntohl(p->val); - wntohl(p->str, STR_LEN); - } - } - { - struct __collate_st_chain_pri *p = TMP->__chain_pri_table; - for(i = chains; i-- > 0; p++) { - wntohl(p->str, STR_LEN); - for(z = 0; z < info.directive_count; z++) - p->pri[z] = ntohl(p->pri[z]); - } + if (__collate_chain_pri_table != NULL) + free(__collate_chain_pri_table); + __collate_chain_pri_table = TMP_chain_pri_table; + for (i = 0; i < chains; i++) { + __collate_chain_pri_table[i].prim = + ntohl(__collate_chain_pri_table[i].prim); + __collate_chain_pri_table[i].sec = + ntohl(__collate_chain_pri_table[i].sec); } - if (info.large_pri_count > 0) { - struct __collate_st_large_char_pri *p = - TMP->__large_char_pri_table; - for(i = info.large_pri_count; i-- > 0; p++) { - p->val = ntohl(p->val); - for(z = 0; z < info.directive_count; z++) - p->pri.pri[z] = ntohl(p->pri.pri[z]); + __collate_substitute_nontrivial = 0; + for (i = 0; i < UCHAR_MAX + 1; i++) { + if (__collate_substitute_table[i][0] != i || + __collate_substitute_table[i][1] != 0) { + __collate_substitute_nontrivial = 1; + break; } } -#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ - (void)strcpy(TMP->__encoding, encoding); - (void)memcpy(&TMP->__info, &info, sizeof(info)); - __collate_data = TMP; - - __collate_load_error = (info.subst_count[0] > 0 || - info.subst_count[1] > 0); __collate_load_error = 0; -#ifdef LOCALE_DEBUG - fprintf(stderr, "__collate_load_tables: loaded successfully\n"); -#endif return (_LDP_LOADED); } -static int -__collate_wcsnlen(const wchar_t *s, int len) +u_char * +__collate_substitute(const u_char *s) { - int n = 0; - while (*s && n < len) { - s++; - n++; - } - return n; -} - -static struct __collate_st_subst * -substsearch(const wchar_t key, struct __collate_st_subst *tab, int n) -{ - int low = 0; - int high = n - 1; - int next, compar; - struct __collate_st_subst *p; - - while (low <= high) { - next = (low + high) / 2; - p = tab + next; - compar = key - p->val; - if (compar == 0) - return p; - if (compar > 0) - low = next + 1; - else - high = next - 1; - } - return NULL; -} - -wchar_t * -__collate_substitute(const wchar_t *s, int which) -{ int dest_len, len, nlen; - int n, delta, nsubst; - wchar_t *dest_str = NULL; - const wchar_t *fp; - struct __collate_st_subst *subst, *match; + int delta = strlen(s); + u_char *dest_str = NULL; if (s == NULL || *s == '\0') - return (__collate_wcsdup(L"")); - dest_len = wcslen(s); - nsubst = __collate_info->subst_count[which]; - if (nsubst <= 0) - return __collate_wcsdup(s); - subst = __collate_substitute_table[which]; - delta = dest_len / 4; - if (delta < 2) - delta = 2; - dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t)); + return (__collate_strdup("")); + delta += delta / 8; + dest_str = malloc(dest_len = delta); if (dest_str == NULL) __collate_err(EX_OSERR, __func__); len = 0; while (*s) { - if ((match = substsearch(*s, subst, nsubst)) != NULL) { - fp = match->str; - n = __collate_wcsnlen(fp, STR_LEN); - } else { - fp = s; - n = 1; - } - nlen = len + n; + nlen = len + strlen(__collate_substitute_table[*s]); if (dest_len <= nlen) { - dest_str = reallocf(dest_str, (dest_len = nlen + delta) - * sizeof(wchar_t)); + dest_str = reallocf(dest_str, dest_len = nlen + delta); if (dest_str == NULL) __collate_err(EX_OSERR, __func__); } - wcsncpy(dest_str + len, fp, n); - len += n; - s++; + (void)strcpy(dest_str + len, __collate_substitute_table[*s++]); + len = nlen; } - dest_str[len] = 0; return (dest_str); } -static struct __collate_st_chain_pri * -chainsearch(const wchar_t *key, int *len) -{ - int low = 0; - int high = __collate_info->chain_count - 1; - int next, compar, l; - struct __collate_st_chain_pri *p; - struct __collate_st_chain_pri *tab = __collate_chain_pri_table; - - while (low <= high) { - next = (low + high) / 2; - p = tab + next; - compar = *key - *p->str; - if (compar == 0) { - l = __collate_wcsnlen(p->str, STR_LEN); - compar = wcsncmp(key, p->str, l); - if (compar == 0) { - *len = l; - return p; - } - } - if (compar > 0) - low = next + 1; - else - high = next - 1; - } - return NULL; -} - -static struct __collate_st_large_char_pri * -largesearch(const wchar_t key) -{ - int low = 0; - int high = __collate_info->large_pri_count - 1; - int next, compar; - struct __collate_st_large_char_pri *p; - struct __collate_st_large_char_pri *tab = - __collate_large_char_pri_table; - - while (low <= high) { - next = (low + high) / 2; - p = tab + next; - compar = key - p->val; - if (compar == 0) - return p; - if (compar > 0) - low = next + 1; - else - high = next - 1; - } - return NULL; -} - void -__collate_lookup(const wchar_t *t, int *len, int *prim, int *sec) +__collate_lookup(const u_char *t, int *len, int *prim, int *sec) { struct __collate_st_chain_pri *p2; - int l; *len = 1; *prim = *sec = 0; - p2 = chainsearch(t, &l); - /* use the chain if prim >= 0 */ - if (p2 && p2->pri[0] >= 0) { - *len = l; - *prim = p2->pri[0]; - *sec = p2->pri[1]; - return; - } - if (*t <= UCHAR_MAX) { - *prim = __collate_char_pri_table[*t].pri[0]; - *sec = __collate_char_pri_table[*t].pri[1]; - return; - } - if (__collate_info->large_pri_count > 0) { - struct __collate_st_large_char_pri *match; - match = largesearch(*t); - if (match) { - *prim = match->pri.pri[0]; - *sec = match->pri.pri[1]; + for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) { + if (*t == p2->str[0] && + strncmp(t, p2->str, strlen(p2->str)) == 0) { + *len = strlen(p2->str); + *prim = p2->prim; + *sec = p2->sec; return; } } - *prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l; - *sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l; + *prim = __collate_char_pri_table[*t].prim; + *sec = __collate_char_pri_table[*t].sec; } -void -__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which) +u_char * +__collate_strdup(u_char *s) { - struct __collate_st_chain_pri *p2; - int p, l; + u_char *t = strdup(s); - *len = 1; - *pri = 0; - p2 = chainsearch(t, &l); - if (p2) { - p = p2->pri[which]; - /* use the chain if pri >= 0 */ - if (p >= 0) { - *len = l; - *pri = p; - return; - } - } - if (*t <= UCHAR_MAX) { - *pri = __collate_char_pri_table[*t].pri[which]; - return; - } - if (__collate_info->large_pri_count > 0) { - struct __collate_st_large_char_pri *match; - match = largesearch(*t); - if (match) { - *pri = match->pri.pri[which]; - return; - } - } - *pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l; -} - -wchar_t * -__collate_mbstowcs(const char *s) -{ - static const mbstate_t initial; - mbstate_t st; - size_t len; - const char *ss; - wchar_t *wcs; - - ss = s; - st = initial; - if ((len = mbsrtowcs(NULL, &ss, 0, &st)) == (size_t)-1) - return NULL; - if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL) + if (t == NULL) __collate_err(EX_OSERR, __func__); - st = initial; - mbsrtowcs(wcs, &s, len, &st); - wcs[len] = 0; - - return (wcs); + return (t); } -wchar_t * -__collate_wcsdup(const wchar_t *s) -{ - size_t len = wcslen(s) + 1; - wchar_t *wcs; - - if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL) - __collate_err(EX_OSERR, __func__); - wcscpy(wcs, s); - return (wcs); -} - void -__collate_xfrm(const wchar_t *src, wchar_t **xf) -{ - int pri, len; - size_t slen; - const wchar_t *t; - wchar_t *tt = NULL, *tr = NULL; - int direc, pass; - wchar_t *xfp; - struct __collate_st_info *info = __collate_info; - int sverrno; - - for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++) - xf[pass] = NULL; - for(pass = 0; pass < info->directive_count; pass++) { - direc = info->directive[pass]; - if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) { - sverrno = errno; - free(tt); - errno = sverrno; - tt = __collate_substitute(src, pass); - } - if (direc & DIRECTIVE_BACKWARD) { - wchar_t *bp, *fp, c; - sverrno = errno; - free(tr); - errno = sverrno; - tr = __collate_wcsdup(tt ? tt : src); - bp = tr; - fp = tr + wcslen(tr) - 1; - while(bp < fp) { - c = *bp; - *bp++ = *fp; - *fp-- = c; - } - t = (const wchar_t *)tr; - } else if (tt) - t = (const wchar_t *)tt; - else - t = (const wchar_t *)src; - sverrno = errno; - if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * - (wcslen(t) + 1))) == NULL) { - errno = sverrno; - slen = 0; - goto end; - } - errno = sverrno; - xfp = xf[pass]; - if (direc & DIRECTIVE_POSITION) { - while(*t) { - __collate_lookup_which(t, &len, &pri, pass); - t += len; - if (pri <= 0) { - if (pri < 0) { - errno = EINVAL; - slen = 0; - goto end; - } - pri = COLLATE_MAX_PRIORITY; - } - *xfp++ = pri; - } - } else { - while(*t) { - __collate_lookup_which(t, &len, &pri, pass); - t += len; - if (pri <= 0) { - if (pri < 0) { - errno = EINVAL; - slen = 0; - goto end; - } - continue; - } - *xfp++ = pri; - } - } - *xfp = 0; - } - end: - sverrno = errno; - free(tt); - free(tr); - errno = sverrno; -} - -void __collate_err(int ex, const char *f) { const char *s; @@ -584,350 +275,24 @@ exit(ex); } -/* - * __collate_collating_symbol takes the multibyte string specified by - * src and slen, and using ps, converts that to a wide character. Then - * it is checked to verify it is a collating symbol, and then copies - * it to the wide character string specified by dst and dlen (the - * results are not null terminated). The length of the wide characters - * copied to dst is returned if successful. Zero is returned if no such - * collating symbol exists. (size_t)-1 is returned if there are wide-character - * conversion errors, if the length of the converted string is greater that - * STR_LEN or if dlen is too small. It is up to the calling routine to - * preserve the mbstate_t structure as needed. - */ -size_t -__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, - size_t slen, mbstate_t *ps) -{ - wchar_t wname[STR_LEN]; - wchar_t w, *wp; - size_t len, l; - - /* POSIX locale */ - if (__collate_load_error) { - if (dlen < 1) - return (size_t)-1; - if (slen != 1 || !isascii(*src)) - return 0; - *dst = *src; - return 1; - } - for(wp = wname, len = 0; slen > 0; len++) { - l = mbrtowc(&w, src, slen, ps); - if (l == (size_t)-1 || l == (size_t)-2) - return (size_t)-1; - if (l == 0) - break; - if (len >= STR_LEN) - return -1; - *wp++ = w; - src += l; - slen = (long)slen - (long)l; - } - if (len == 0 || len > dlen) - return (size_t)-1; - if (len == 1) { - if (*wname <= UCHAR_MAX) { - if (__collate_char_pri_table[*wname].pri[0] >= 0) { - if (dlen > 0) - *dst = *wname; - return 1; - } - return 0; - } else if (__collate_info->large_pri_count > 0) { - struct __collate_st_large_char_pri *match; - match = largesearch(*wname); - if (match && match->pri.pri[0] >= 0) { - if (dlen > 0) - *dst = *wname; - return 1; - } - } - return 0; - } - *wp = 0; - if (__collate_info->chain_count > 0) { - struct __collate_st_chain_pri *match; - int ll; - match = chainsearch(wname, &ll); - if (match) { - if (ll < dlen) - dlen = ll; - wcsncpy(dst, wname, dlen); - return ll; - } - } - return 0; -} - -/* - * __collate_equiv_class returns the equivalence class number for the symbol - * specified by src and slen, using ps to convert from multi-byte to wide - * character. Zero is returned if the symbol is not in an equivalence - * class. -1 is returned if there are wide character conversion error, - * if there are any greater-than-8-bit characters or if a multi-byte symbol - * is greater or equal to STR_LEN in length. It is up to the calling - * routine to preserve the mbstate_t structure as needed. - */ -int -__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps) -{ - wchar_t wname[STR_LEN]; - wchar_t w, *wp; - size_t len, l; - int e; - - /* POSIX locale */ - if (__collate_load_error) - return 0; - for(wp = wname, len = 0; slen > 0; len++) { - l = mbrtowc(&w, src, slen, ps); - if (l == (size_t)-1 || l == (size_t)-2) - return -1; - if (l == 0) - break; - if (len >= STR_LEN) - return -1; - *wp++ = w; - src += l; - slen = (long)slen - (long)l; - } - if (len == 0) - return -1; - if (len == 1) { - e = -1; - if (*wname <= UCHAR_MAX) - e = __collate_char_pri_table[*wname].pri[0]; - else if (__collate_info->large_pri_count > 0) { - struct __collate_st_large_char_pri *match; - match = largesearch(*wname); - if (match) - e = match->pri.pri[0]; - } - if (e == 0) - return IGNORE_EQUIV_CLASS; - return e > 0 ? e : 0; - } - *wp = 0; - if (__collate_info->chain_count > 0) { - struct __collate_st_chain_pri *match; - int ll; - match = chainsearch(wname, &ll); - if (match) { - e = match->pri[0]; - if (e == 0) - return IGNORE_EQUIV_CLASS; - return e < 0 ? -e : e; - } - } - return 0; -} - -/* - * __collate_equiv_match tries to match any single or multi-character symbol - * in equivalence class equiv_class in the multi-byte string specified by src - * and slen. If start is non-zero, it is taken to be the first (pre-converted) - * wide character. Subsequence wide characters, if needed, will use ps in - * the conversion. On a successful match, the length of the matched string - * is returned (including the start character). If dst is non-NULL, the - * matched wide-character string is copied to dst, a wide character array of - * length dlen (the results are not zero-terminated). If rlen is non-NULL, - * the number of character in src actually used is returned. Zero is - * returned by __collate_equiv_match if there is no match. (size_t)-1 is - * returned on error: if there were conversion errors or if dlen is too small - * to accept the results. On no match or error, ps is restored to its incoming - * state. - */ -size_t -__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen) -{ - wchar_t w; - size_t len, l, clen; - int i; - wchar_t buf[STR_LEN], *wp; - mbstate_t save; - const char *s = src; - size_t sl = slen; - struct __collate_st_chain_pri *ch = NULL; - - /* POSIX locale */ - if (__collate_load_error) - return (size_t)-1; - if (equiv_class == IGNORE_EQUIV_CLASS) - equiv_class = 0; - if (ps) - save = *ps; - wp = buf; - len = clen = 0; - if (start) { - *wp++ = start; - len = 1; - } - /* convert up to the max chain length */ - while(sl > 0 && len < __collate_info->chain_max_len) { - l = mbrtowc(&w, s, sl, ps); - if (l == (size_t)-1 || l == (size_t)-2 || l == 0) - break; - *wp++ = w; - s += l; - clen += l; - sl -= l; - len++; - } - *wp = 0; - if (len > 1 && (ch = chainsearch(buf, &i)) != NULL) { - int e = ch->pri[0]; - if (e < 0) - e = -e; - if (e == equiv_class) - goto found; - } - /* try single character */ - i = 1; - if (*buf <= UCHAR_MAX) { - if (equiv_class == __collate_char_pri_table[*buf].pri[0]) - goto found; - } else if (__collate_info->large_pri_count > 0) { - struct __collate_st_large_char_pri *match; - match = largesearch(*buf); - if (match && equiv_class == match->pri.pri[0]) - goto found; - } - /* no match */ - if (ps) - *ps = save; - return 0; -found: - /* if we converted more than we used, restore to initial and reconvert - * up to what did match */ - if (i < len) { - len = i; - if (ps) - *ps = save; - if (start) - i--; - clen = 0; - while(i-- > 0) { - l = mbrtowc(&w, src, slen, ps); - src += l; - clen += l; - slen -= l; - } - } - if (dst) { - if (dlen < len) { - if (ps) - *ps = save; - return (size_t)-1; - } - for(wp = buf; len > 0; len--) - *dst++ = *wp++; - } - if (rlen) - *rlen = clen; - return len; -} - -#if _BYTE_ORDER == _LITTLE_ENDIAN -static void -wntohl(wchar_t *str, int len) -{ - for(; *str && len > 0; str++, len--) - *str = ntohl(*str); -} -#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ - #ifdef COLLATE_DEBUG -static char * -show(int c) -{ - static char buf[5]; - - if (c >=32 && c <= 126) - sprintf(buf, "'%c' ", c); - else - sprintf(buf, "\\x{%02x}", c); - return buf; -} - -static char * -showwcs(const wchar_t *t, int len) -{ - static char buf[64]; - char *cp = buf; - - for(; *t && len > 0; len--, t++) { - if (*t >=32 && *t <= 126) - *cp++ = *t; - else { - sprintf(cp, "\\x{%02x}", *t); - cp += strlen(cp); - } - } - *cp = 0; - return buf; -} - void __collate_print_tables() { - int i, z; - locale_t loc = __current_locale(); + int i; + struct __collate_st_chain_pri *p2; - printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d " - "sc=%d cc=%d lc=%d\n", - __collate_info->directive[0], __collate_info->directive[1], - __collate_info->flags, __collate_info->chain_max_len, - __collate_info->directive_count, - __collate_info->undef_pri[0], __collate_info->undef_pri[1], - __collate_info->subst_count[0], __collate_info->subst_count[1], - __collate_info->chain_count, __collate_info->large_pri_count); - for(z = 0; z < __collate_info->directive_count; z++) { - if (__collate_info->subst_count[z] > 0) { - struct __collate_st_subst *p2 = - __collate_substitute_table[z]; - if (z == 0 && (__collate_info->flags & - COLLATE_SUBST_DUP)) - printf("Both substitute tables:\n"); - else - printf("Substitute table %d:\n", z); - for (i = __collate_info->subst_count[z]; i-- > 0; p2++) - printf("\t%s --> \"%s\"\n", - show(p2->val), - showwcs(p2->str, STR_LEN)); - } - } - if (__collate_info->chain_count > 0) { - printf("Chain priority table:\n"); - struct __collate_st_chain_pri *p2 = __collate_chain_pri_table; - for (i = __collate_info->chain_count; i-- > 0; p2++) { - printf("\t\"%s\" :", showwcs(p2->str, STR_LEN)); - for(z = 0; z < __collate_info->directive_count; z++) - printf(" %d", p2->pri[z]); - putchar('\n'); - } - } + printf("Substitute table:\n"); + for (i = 0; i < UCHAR_MAX + 1; i++) + if (i != *__collate_substitute_table[i]) + printf("\t'%c' --> \"%s\"\n", i, + __collate_substitute_table[i]); + printf("Chain priority table:\n"); + for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) + printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec); printf("Char priority table:\n"); - { - struct __collate_st_char_pri *p2 = __collate_char_pri_table; - for (i = 0; i < UCHAR_MAX + 1; i++, p2++) { - printf("\t%s :", show(i)); - for(z = 0; z < __collate_info->directive_count; z++) - printf(" %d", p2->pri[z]); - putchar('\n'); - } - } - if (__collate_info->large_pri_count > 0) { - struct __collate_st_large_char_pri *p2 = - __collate_large_char_pri_table; - printf("Large priority table:\n"); - for (i = __collate_info->large_pri_count; i-- > 0; p2++) { - printf("\t%s :", show(p2->val)); - for(z = 0; z < __collate_info->directive_count; z++) >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200808032057.m73KvjmO094526>