From owner-p4-projects@FreeBSD.ORG Mon Sep 1 11:22:48 2008 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id E05771065680; Mon, 1 Sep 2008 11:22:47 +0000 (UTC) Delivered-To: perforce@FreeBSD.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id A2DE4106567C for ; Mon, 1 Sep 2008 11:22:47 +0000 (UTC) (envelope-from konrad@FreeBSD.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 930E48FC1E for ; Mon, 1 Sep 2008 11:22:47 +0000 (UTC) (envelope-from konrad@FreeBSD.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.2/8.14.2) with ESMTP id m81BMlho030918 for ; Mon, 1 Sep 2008 11:22:47 GMT (envelope-from konrad@FreeBSD.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.2/8.14.1/Submit) id m81BMlRF030916 for perforce@freebsd.org; Mon, 1 Sep 2008 11:22:47 GMT (envelope-from konrad@FreeBSD.org) Date: Mon, 1 Sep 2008 11:22:47 GMT Message-Id: <200809011122.m81BMlRF030916@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to konrad@FreeBSD.org using -f From: Konrad Jankowski To: Perforce Change Reviews Cc: Subject: PERFORCE change 148978 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 01 Sep 2008 11:22:48 -0000 http://perforce.freebsd.org/chv.cgi?CH=148978 Change 148978 by konrad@vspredator on 2008/09/01 11:22:34 Colldef with proper expansion support added. This is not a production version. It will go through a process of space optimisation. Affected files ... .. //depot/projects/soc2008/konrad_collation/colldef.apple/collate.h#7 edit .. //depot/projects/soc2008/konrad_collation/colldef.apple/parse.y#9 edit Differences ... ==== //depot/projects/soc2008/konrad_collation/colldef.apple/collate.h#7 (text+ko) ==== @@ -33,6 +33,8 @@ #include #ifndef __LIBC__ #include +#else +#include /* for ENCODING_LEN */ #endif /* !__LIBC__ */ #include @@ -46,6 +48,7 @@ #define COLLATE_VERSION1_1A "1.1A\n" #define COLLATE_VERSION1_2 "1.2\n" #define COLLATE_VERSION1_3 "1.3\n" +#define COLLATE_VERSION1_4 "1.4\n" /* see discussion in string/FreeBSD/strxfrm for this value */ #define COLLATE_MAX_PRIORITY ((1 << 24) - 1) @@ -63,7 +66,7 @@ struct __collate_st_info { __uint8_t directive[COLL_WEIGHTS_MAX]; __uint8_t flags; -#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN +#if _BYTE_ORDER == _LITTLE_ENDIAN unsigned int directive_count:4; unsigned int chain_max_len:4; #else @@ -76,10 +79,18 @@ __int32_t large_pri_count; }; +struct weight_map_struct +{ + unsigned int v:4; +}; + +#define COLL_WEIGHTS_REAL (COLL_WEIGHTS_MAX * 4) struct __collate_st_char_pri { - __int32_t pri[COLL_WEIGHTS_MAX]; + struct weight_map_struct map[COLL_WEIGHTS_MAX]; + __int32_t pri[COLL_WEIGHTS_REAL]; }; struct __collate_st_chain_pri { + struct weight_map_struct map[COLL_WEIGHTS_MAX]; wchar_t str[STR_LEN]; __int32_t pri[COLL_WEIGHTS_MAX]; }; @@ -92,29 +103,34 @@ wchar_t str[STR_LEN]; }; -#ifndef __LIBC__ +#ifdef __LIBC__ +struct __locale_st_collate { + char __encoding[ENCODING_LEN + 1]; + struct __collate_st_info __info; + struct __collate_st_subst *__substitute_table[COLL_WEIGHTS_MAX]; + struct __collate_st_chain_pri *__chain_pri_table; + struct __collate_st_large_char_pri *__large_char_pri_table; + struct __collate_st_char_pri __char_pri_table[UCHAR_MAX + 1]; +}; +#endif + extern int __collate_load_error; extern int __collate_substitute_nontrivial; -#define __collate_char_pri_table (*__collate_char_pri_table_ptr) -extern struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1]; -extern struct __collate_st_chain_pri *__collate_chain_pri_table; -extern __int32_t *__collate_chain_equiv_table; -extern struct __collate_st_info __collate_info; -#endif /* !__LIBC__ */ +extern struct __locale_st_collate *__collate_data; __BEGIN_DECLS #ifdef __LIBC__ -wchar_t *__collate_mbstowcs(const char *, locale_t); +wchar_t *__collate_mbstowcs(const char *); wchar_t *__collate_wcsdup(const wchar_t *); -wchar_t *__collate_substitute(const wchar_t *, int, locale_t); -int __collate_load_tables(const char *, locale_t); -void __collate_lookup_l(const wchar_t *, int *, int *, int *, locale_t); -void __collate_lookup_which(const wchar_t *, int *, int *, int, locale_t); -void __collate_xfrm(const wchar_t *, wchar_t **, locale_t); -int __collate_range_cmp(wchar_t, wchar_t, locale_t); -size_t __collate_collating_symbol(wchar_t *, size_t, const char *, size_t, mbstate_t *, locale_t); -int __collate_equiv_class(const char *, size_t, mbstate_t *, locale_t); -size_t __collate_equiv_match(int, wchar_t *, size_t, wchar_t, const char *, size_t, mbstate_t *, size_t *, locale_t); +wchar_t *__collate_substitute(const wchar_t *, int); +int __collate_load_tables(const char *); +void __collate_lookup_l(const wchar_t *, int *, int *, int *); +void __collate_lookup_which(const wchar_t *, int *, int *, int); +void __collate_xfrm(const wchar_t *, wchar_t **); +int __collate_range_cmp(wchar_t, wchar_t); +size_t __collate_collating_symbol(wchar_t *, size_t, const char *, size_t, mbstate_t *); +int __collate_equiv_class(const char *, size_t, mbstate_t *); +size_t __collate_equiv_match(int, wchar_t *, size_t, wchar_t, const char *, size_t, mbstate_t *, size_t *); #else /* !__LIBC__ */ void __collate_lookup(const unsigned char *, int *, int *, int *); #endif /* __LIBC__ */ ==== //depot/projects/soc2008/konrad_collation/colldef.apple/parse.y#9 (text+ko) ==== @@ -29,6 +29,7 @@ #include __FBSDID("$FreeBSD: src/usr.bin/colldef/parse.y,v 1.31 2002/10/16 12:56:22 charnier Exp $"); +#include #include #include #include @@ -89,12 +90,19 @@ static DB *chaindb; static int nchain = 0; static DB *stringdb; -static struct symbol prev_weight_table[COLL_WEIGHTS_MAX]; -static struct symbol prev2_weight_table[COLL_WEIGHTS_MAX]; -static struct symbol weight_table[COLL_WEIGHTS_MAX]; + +static struct symbol prev_weight_table[COLL_WEIGHTS_REAL]; +static struct symbol prev2_weight_table[COLL_WEIGHTS_REAL]; +static struct symbol weight_table[COLL_WEIGHTS_REAL]; + +struct weight_map_struct weight_map[COLL_WEIGHTS_MAX]; +struct weight_map_struct prev_weight_map[COLL_WEIGHTS_MAX]; +struct weight_map_struct prev2_weight_map[COLL_WEIGHTS_MAX]; + static int prev_line = LINE_NONE; static struct symbol *prev_elem; static int weight_index = 0; +static int map_idx = 0; static int allow_ellipsis = 0; static struct symbol sym_ellipsis = {SYMBOL_ELLIPSIS, PRI_UNDEFINED, L"", {0}}; static struct symbol sym_ignore = {SYMBOL_IGNORE, PRI_IGNORE, L"", {0}}; @@ -113,7 +121,6 @@ #endif struct __collate_st_info info = {{DIRECTIVE_FORWARD, DIRECTIVE_FORWARD}, 0, 0, 0, {PRI_UNDEFINED, PRI_UNDEFINED}, {PRI_UNDEFINED}, 0, 0}; -/* Some of the code expects COLL_WEIGHTS_MAX == 2 */ int directive_count = COLL_WEIGHTS_MAX; const char *out_file = "LC_COLLATE"; @@ -209,8 +216,10 @@ /* we don't set the byte order of t->val, since we * need it for sorting */ t->val = cval; - for(z = 0; z < directive_count; z++) + for(z = 0; z < COLL_WEIGHTS_REAL; z++) t->pri.pri[z] = htonl(p->pri[z]); + for (z = 0; z < directive_count; z++) + t->pri.map[z].v = p->map[z].v; t++; flags = R_NEXT; } @@ -232,17 +241,20 @@ int flags = R_FIRST; DBT key, val; struct symbol *v; + while((ret = charmapdb->seq(charmapdb, &key, &val, flags)) == 0) { memcpy(&v, val.data, sizeof(struct symbol *)); switch(v->type) { case SYMBOL_CHAR: { struct __collate_st_char_pri *p = haspri(v->u.wc); + if (!p || p->pri[0] == PRI_UNDEFINED) warnx("<%s> was not defined", showwcs((const wchar_t *)key.data, key.size / sizeof(wchar_t))); break; } case SYMBOL_CHAIN: { struct __collate_st_chain_pri *p = getchain(v->u.str, EXISTS); + if (p->pri[0] == PRI_UNDEFINED) warnx("<%s> was not defined", showwcs((const wchar_t *)key.data, key.size / sizeof(wchar_t))); break; @@ -307,34 +319,34 @@ err(EX_UNAVAILABLE, "can't open destination file %s", out_file); - strcpy(__collate_version, COLLATE_VERSION1_3); + strcpy(__collate_version, COLLATE_VERSION1_4); if (fwrite(__collate_version, sizeof(__collate_version), 1, fp) != 1) err(EX_IOERR, "IO error writting collate version to destination file %s", out_file); -#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN +#if _BYTE_ORDER == _LITTLE_ENDIAN for(z = 0; z < directive_count; z++) { info.undef_pri[z] = htonl(info.undef_pri[z]); info.subst_count[z] = htonl(info.subst_count[z]); } info.chain_count = htonl(info.chain_count); info.large_pri_count = htonl(info.large_pri_count); -#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ +#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ if (fwrite(&info, sizeof(info), 1, fp) != 1) err(EX_IOERR, "IO error writting collate info to destination file %s", out_file); -#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN +#if _BYTE_ORDER == _LITTLE_ENDIAN { int i, z; struct __collate_st_char_pri *p = __collate_char_pri_table; for(i = UCHAR_MAX + 1; i-- > 0; p++) { - for(z = 0; z < directive_count; z++) + for(z = 0; z < COLL_WEIGHTS_REAL; z++) p->pri[z] = htonl(p->pri[z]); } } -#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ +#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ if (fwrite(__collate_char_pri_table, sizeof(__collate_char_pri_table), 1, fp) != 1) err(EX_IOERR, @@ -342,14 +354,15 @@ out_file); for(z = 0; z < directive_count; z++) { if (nsubst[z] > 0) { -#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN +#if _BYTE_ORDER == _LITTLE_ENDIAN struct __collate_st_subst *t = __collate_substitute_table[z]; int i; + for(i = nsubst[z]; i > 0; i--) { t->val = htonl(t->val); t++; } -#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ +#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ if ((int)fwrite(__collate_substitute_table[z], sizeof(struct __collate_st_subst), nsubst[z], fp) != nsubst[z]) err(EX_IOERR, "IO error writting large substprim table %d to destination file %s", @@ -357,7 +370,7 @@ } } if (nchain > 0) { -#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN +#if _BYTE_ORDER == _LITTLE_ENDIAN int i, j, z; struct __collate_st_chain_pri *p = __collate_chain_pri_table; wchar_t *w; @@ -368,7 +381,7 @@ for(z = 0; z < directive_count; z++) p->pri[z] = htonl(p->pri[z]); } -#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ +#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ if (fwrite(__collate_chain_pri_table, sizeof(*__collate_chain_pri_table), nchain, fp) != (size_t)nchain) @@ -378,14 +391,14 @@ } if (nlargemap > 0) { -#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN +#if _BYTE_ORDER == _LITTLE_ENDIAN struct __collate_st_large_char_pri *t = __collate_large_char_pri_table; int i; for(i = 0; i < nlargemap; i++) { t->val = htonl(t->val); t++; } -#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */ +#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */ if ((int)fwrite(__collate_large_char_pri_table, sizeof(struct __collate_st_large_char_pri), nlargemap, fp) != nlargemap) err(EX_IOERR, "IO error writting large pri tables to destination file %s", @@ -396,7 +409,6 @@ err(EX_IOERR, "IO error closing destination file %s", out_file); #ifdef COLLATE_DEBUG - /* Do it first, before conversion to network byte order. */ if (debug) collate_print_tables(); #endif @@ -511,7 +523,7 @@ | ELEM { struct symbol *s = getsymbol($1, EXISTS); -#ifdef VSDEBUG +#ifdef VSDEBUG2 printf("\n%s(%d) ", showwcs(s->name, CHARMAP_SYMBOL_LEN), s->u.wc); #endif if (s->val != PRI_UNDEFINED) @@ -540,6 +552,7 @@ s->val = prim_pri; prim_pri = s->val + 1; weight_index = 0; + map_idx = 0; } weights2 { int i; struct symbol *s = getsymbol($1, EXISTS); @@ -547,16 +560,21 @@ if (weight_index != 0) yyerror("Can't specify weights for collating symbol <%s>", showwcs($1, CHARMAP_SYMBOL_LEN)); } else if (weight_index == 0) { - for(i = 0; i < directive_count; i++) + for(i = 0; i < directive_count; i++) { weight_table[i] = *s; - } else if (weight_index != directive_count) + /* Store the end, inclusive. */ + weight_map[i].v = i; + } + } else if (map_idx != directive_count) yyerror("Not enough weights specified"); memcpy(prev_weight_table, weight_table, sizeof(weight_table)); + memcpy(prev_weight_map, weight_map, sizeof(weight_map)); prev_line = LINE_NORMAL; prev_elem = s; } - | ELLIPSIS { weight_index = 0; allow_ellipsis = 1; } weights { + | ELLIPSIS { weight_index = 0; allow_ellipsis = 1; map_idx = 0; } weights { int i; + if (prev_line == LINE_ELLIPSIS) yyerror("Illegal sequential ellipsis lines"); if (prev_line == LINE_UNDEFINED) @@ -564,11 +582,13 @@ if (prev_line == LINE_NONE) yyerror("Ellipsis line must follow a collating identifier lines"); if (weight_index == 0) { - for(i = 0; i < directive_count; i++) + for(i = 0; i < directive_count; i++) { weight_table[i] = sym_ellipsis; - } else if (weight_index != directive_count) + weight_map[i].v = i; + } + } else if (map_idx != directive_count) yyerror("Not enough weights specified"); - for(i = 0; i < directive_count; i++) { + for(i = 0; i < weight_index; i++) { if (weight_table[i].type != SYMBOL_ELLIPSIS) continue; switch (prev_weight_table[i].type) { @@ -584,6 +604,8 @@ } memcpy(prev2_weight_table, prev_weight_table, sizeof(prev_weight_table)); memcpy(prev_weight_table, weight_table, sizeof(weight_table)); + memcpy(prev2_weight_map, prev_weight_map, sizeof(weight_map)); + memcpy(prev_weight_map, weight_map, sizeof(weight_map)); prev_line = LINE_ELLIPSIS; allow_ellipsis = 0; } @@ -592,16 +614,21 @@ yyerror("Multiple UNDEFINED lines not allowed"); sym_undefined.val = prim_pri++; weight_index = 0; + map_idx = 0; allow_ellipsis = 1; } weights { int i; if (weight_index == 0) { weight_table[0] = sym_undefined; - for(i = 1; i < directive_count; i++) + weight_map[0].v = 0; + for(i = 1; i < directive_count; i++) { + weight_map[i].v = i; weight_table[i] = sym_ellipsis; - } else if (weight_index != directive_count) + } + } else if (map_idx != directive_count) yyerror("Not enough weights specified"); memcpy(prev_weight_table, weight_table, sizeof(weight_table)); + memcpy(prev_weight_map, weight_map, sizeof(weight_map)); prev_line = LINE_UNDEFINED; } ; @@ -609,7 +636,7 @@ | order_lines2 order_line2 '\n' ; order_line2 : - | ELEM { weight_index = 0; } weights2 { + | ELEM { weight_index = 0; map_idx = 0; } weights2 { int i; struct symbol *s = getsymbol($1, EXISTS); @@ -619,12 +646,15 @@ if (weight_index != 0) yyerror("Can't specify weights for collating symbol <%s>", showwcs($1, CHARMAP_SYMBOL_LEN)); } else if (weight_index == 0) { - for(i = 0; i < directive_count; i++) + for(i = 0; i < directive_count; i++) { weight_table[i] = *s; - } else if (weight_index != directive_count) + weight_map[i].v = i; + } + } else if (map_idx != directive_count) yyerror("Not enough weights specified"); if (prev_line == LINE_ELLIPSIS) { int w, x; + for(i = 0; i < directive_count; i++) { switch (prev_weight_table[i].type) { case SYMBOL_CHAR: @@ -633,13 +663,13 @@ case SYMBOL_SYMBOL: for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) { struct __collate_st_char_pri *p = getpri(w); + if (p->pri[i] != PRI_UNDEFINED) yyerror("Char 0x%02x previously defined", w); p->pri[i] = prev_weight_table[i].val; } break; case SYMBOL_ELLIPSIS: - switch (weight_table[i].type) { case SYMBOL_STRING: yyerror("Strings can't be endpoints of ellipsis"); @@ -650,11 +680,14 @@ case SYMBOL_SYMBOL: yyerror("Collation symbols can't be endpoints of ellipsis"); } - if (s->val - prev_elem->val != weight_table[i].val - prev2_weight_table[i].val) + if (s->val - prev_elem->val != + weight_table[i].val - + prev2_weight_table[i].val) yyerror("Range mismatch in weight %d", i); x = prev2_weight_table[i].val + 1; for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) { struct __collate_st_char_pri *p = getpri(w); + if (p->pri[i] != PRI_UNDEFINED) yyerror("Char 0x%02x previously defined", w); p->pri[i] = x++; @@ -663,6 +696,7 @@ case SYMBOL_STRING: for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) { struct __collate_st_char_pri *p = getpri(w); + if (p->pri[i] != PRI_UNDEFINED) yyerror("Char 0x%02x previously defined", w); putsubst(w, i, prev_weight_table[i].u.str); @@ -675,7 +709,13 @@ switch(s->type) { case SYMBOL_CHAR: { struct __collate_st_char_pri *p = getpri(s->u.wc); - for(i = 0; i < directive_count; i++) { + +#ifdef VSDEBUG + fprintf(stderr, "%s ", showwcs(s->name, CHARMAP_SYMBOL_LEN)); + fprintf(stderr, "weights=%d ", weight_index); +#endif + /* This is the main piece of code. */ + for(i = 0; i < weight_index; i++) { switch (weight_table[i].type) { case SYMBOL_CHAR: case SYMBOL_CHAIN: @@ -684,6 +724,9 @@ if (p->pri[i] != PRI_UNDEFINED) yyerror("Char 0x%02x previously defined", s->u.wc); p->pri[i] = weight_table[i].val; +#ifdef VSDEBUG + fprintf(stderr, " weight[%d]=%d", i, p->pri[i]); +#endif break; case SYMBOL_STRING: if (p->pri[i] != PRI_UNDEFINED) @@ -692,41 +735,59 @@ p->pri[i] = weight_table[i].val; break; } +#if 0 + default: + errx(1, "unrecognized symbol type: %d", weight_table[i].type); +#endif } +#ifndef NDEBUG + for (i = 0; i < COLL_WEIGHTS_MAX - 1; i++) + assert(weight_map[i].v < weight_map[i + 1].v); +#endif + memcpy(p->map, weight_map, sizeof(p->map)); +#ifdef VSDEBUG + fputc('\n', stderr); +#endif break; } case SYMBOL_CHAIN: { struct __collate_st_chain_pri *p = getchain(s->u.str, EXISTS); - for(i = 0; i < directive_count; i++) { + + for(i = 0; i < weight_index; i++) { switch (weight_table[i].type) { case SYMBOL_CHAR: case SYMBOL_CHAIN: case SYMBOL_IGNORE: case SYMBOL_SYMBOL: if (p->pri[i] != PRI_UNDEFINED) - yyerror("Chain %s previously defined", showwcs(s->u.str, STR_LEN)); + yyerror("Chain %s previously defined", + showwcs(s->u.str, STR_LEN)); p->pri[i] = weight_table[i].val; break; case SYMBOL_STRING : if (wcsncmp(s->u.str, weight_table[i].u.str, STR_LEN) != 0) yyerror("Chain/string mismatch"); if (p->pri[i] != PRI_UNDEFINED) - yyerror("Chain %s previously defined", showwcs(s->u.str, STR_LEN)); + yyerror("Chain %s previously defined", + showwcs(s->u.str, STR_LEN)); /* negative value mean don't substitute * the chain, but it is in an * equivalence class */ p->pri[i] = -weight_table[i].val; } } + memcpy(p->map, weight_map, sizeof(p->map)); break; } } memcpy(prev_weight_table, weight_table, sizeof(weight_table)); + memcpy(prev_weight_map, weight_map, sizeof(weight_map)); prev_line = LINE_NORMAL; prev_elem = s; } - | ELLIPSIS { weight_index = 0; allow_ellipsis = 1; } weights { + | ELLIPSIS { weight_index = 0; map_idx = 0; allow_ellipsis = 1; } weights { int i; + if (prev_line == LINE_ELLIPSIS) yyerror("Illegal sequential ellipsis lines"); if (prev_line == LINE_UNDEFINED) @@ -734,11 +795,13 @@ if (prev_line == LINE_NONE) yyerror("Ellipsis line must follow a collating identifier lines"); if (weight_index == 0) { - for(i = 0; i < directive_count; i++) + for(i = 0; i < directive_count; i++) { weight_table[i] = sym_ellipsis; - } else if (weight_index != directive_count) + weight_map[i].v = i; + } + } else if (map_idx != directive_count) yyerror("Not enough weights specified"); - for(i = 0; i < directive_count; i++) { + for(i = 0; i < weight_index; i++) { if (weight_table[i].type != SYMBOL_ELLIPSIS) continue; switch (prev_weight_table[i].type) { @@ -754,19 +817,23 @@ } memcpy(prev2_weight_table, prev_weight_table, sizeof(prev_weight_table)); memcpy(prev_weight_table, weight_table, sizeof(weight_table)); + memcpy(prev2_weight_map, prev_weight_map, sizeof(prev_weight_map)); + memcpy(prev_weight_map, weight_map, sizeof(weight_map)); prev_line = LINE_ELLIPSIS; allow_ellipsis = 0; } - | UNDEFINED { weight_index = 0; allow_ellipsis = 1; } weights { + | UNDEFINED { weight_index = 0; map_idx = 0; allow_ellipsis = 1; } weights { int i; if (weight_index == 0) { weight_table[0] = sym_undefined; - for(i = 1; i < directive_count; i++) + for(i = 1; i < directive_count; i++) { weight_table[i] = sym_ellipsis; - } else if (weight_index != directive_count) + weight_map[i].v = i; + } + } else if (map_idx != directive_count) yyerror("Not enough weights specified"); - for(i = 0; i < directive_count; i++) { + for(i = 0; i < map_idx; i++) { switch (weight_table[i].type) { case SYMBOL_CHAR: case SYMBOL_CHAIN: @@ -784,7 +851,9 @@ yyerror("Strings can't be used with UNDEFINED"); } } + /* Assume that UNDEFINED has 1->1 mapping. */ memcpy(prev_weight_table, weight_table, sizeof(weight_table)); + memcpy(prev_weight_map, weight_map, sizeof(weight_map)); prev_line = LINE_UNDEFINED; } ; @@ -794,66 +863,70 @@ ; expansion : weight_ex - | expansion weight_ex_null + | expansion weight_ex ; weight2 : weight - | EXPAND expansion EXPAND { -#ifdef VSDEBUG - printf("[%d]=%d ", - weight_index, weight_table[weight_index]->val); -#endif - weight_index++; - } + | EXPAND { +} expansion EXPAND { + /* Store the end (inclusive), not the beginning. */ + weight_map[map_idx].v = weight_index - 1; + map_idx++; +} ; weights : | weight | weights ';' weight ; -weight : weight_ex1 +weight : ELEM { + struct symbol *s; + + assert(weight_index < COLL_WEIGHTS_REAL); + if (map_idx >= directive_count) + yyerror("More weights than specified by order_start (%d >= %d, %s)", map_idx, + directive_count, showwcs($1, CHARMAP_SYMBOL_LEN)); + s = getsymbol($1, EXISTS); + if (order_pass && s->val == PRI_UNDEFINED) + printf("<%s> is undefined\n", showwcs($1, CHARMAP_SYMBOL_LEN)); + weight_map[map_idx++].v = weight_index; + weight_table[weight_index++] = *s; +} | ELLIPSIS { - if (weight_index >= directive_count) + if (map_idx >= directive_count) yyerror("More weights than specified by order_start"); if (!allow_ellipsis) yyerror("Ellipsis weight not allowed"); + /* Mark the end of weights for this level */ + weight_map[map_idx++].v = weight_index; weight_table[weight_index++] = sym_ellipsis; } | IGNORE { - if (weight_index >= directive_count) + if (map_idx >= directive_count) yyerror("More weights than specified by order_start"); + weight_map[map_idx++].v = weight_index; weight_table[weight_index++] = sym_ignore; } | STRING { - if (weight_index >= directive_count) + /* XXX This is broken now, and should be removed. */ + if (map_idx >= directive_count) yyerror("More weights than specified by order_start"); if (wcslen($1) > STR_LEN) yyerror("String '%s' is too long", showwcs($1, STR_LEN)); + /* Store the end, not the beginning. In the simple case it is equivalent. */ + weight_map[map_idx++].v = weight_index; weight_table[weight_index++] = *getstring($1); } ; -weight_ex1 : weight_ex { weight_index++; } -; weight_ex : ELEM { struct symbol *s; - if (weight_index >= directive_count) - yyerror("More weights than specified by order_start (%d >= %d, %s)", weight_index, + assert(weight_index < COLL_WEIGHTS_REAL); + if (map_idx >= directive_count) + yyerror("More weights than specified by order_start (%d >= %d, %s)", map_idx, directive_count, showwcs($1, CHARMAP_SYMBOL_LEN)); s = getsymbol($1, EXISTS); if (order_pass && s->val == PRI_UNDEFINED) printf("<%s> is undefined\n", showwcs($1, CHARMAP_SYMBOL_LEN)); - weight_table[weight_index] = *s; -} -; -weight_ex_null : ELEM { - struct symbol *s; - - if (weight_index >= directive_count) - yyerror("More weights than specified by order_start (%d >= %d, %s)", weight_index, - directive_count, showwcs($1, CHARMAP_SYMBOL_LEN)); - s = getsymbol($1, EXISTS); - if (order_pass && s->val == PRI_UNDEFINED) - printf("<%s> is undefined\n", showwcs($1, CHARMAP_SYMBOL_LEN)); - weight_table[weight_index].val += s->val; + weight_table[weight_index++] = *s; } ; order_end : ORDER_END '\n' @@ -1026,7 +1099,7 @@ for (ch = 0; ch <= UCHAR_MAX; ch++) for(z = 0; z < COLL_WEIGHTS_MAX; z++) __collate_char_pri_table[ch].pri[z] = PRI_UNDEFINED; - if (setlocale(LC_ALL, "en_US.UTF-8") == NULL) + if (setlocale(LC_ALL, "en_US.UTF-8") == NULL) errx(1, "setlocale() failed"); #ifdef COLLATE_DEBUG while((ch = getopt(ac, av, ":do:I:m:")) != -1) { @@ -1077,28 +1150,36 @@ { DBT key, val; struct __collate_st_char_pri *p; - int ret; + int ret, z; + + if (c <= UCHAR_MAX) { + p = &__collate_char_pri_table[c]; + for (z = 0; z < COLL_WEIGHTS_MAX; z++) + p->map[z].v = z; + for(z = 0; z < COLL_WEIGHTS_REAL; z++) + p->pri[z] = PRI_UNDEFINED; - if (c <= UCHAR_MAX) - return &__collate_char_pri_table[c]; + return p; + } key.data = &c; key.size = sizeof(int32_t); if ((ret = largemapdb->get(largemapdb, &key, &val, 0)) < 0) err(1, "getpri: Error getting %s", charname(c)); if (ret != 0) { - struct __collate_st_char_pri *pn; - int z; - if ((pn = (struct __collate_st_char_pri *)malloc(sizeof(struct __collate_st_char_pri))) == NULL) + if ((p = (struct __collate_st_char_pri *) + malloc(sizeof(struct __collate_st_char_pri))) == NULL) err(1, "getpri: malloc"); for(z = 0; z < COLL_WEIGHTS_MAX; z++) - pn->pri[z] = PRI_UNDEFINED; - val.data = &pn; + p->map[z].v = z; + for(z = 0; z < COLL_WEIGHTS_REAL; z++) + p->pri[z] = PRI_UNDEFINED; + val.data = &p; val.size = sizeof(struct __collate_st_char_pri *); if (largemapdb->put(largemapdb, &key, &val, 0) < 0) err(1, "getpri: Error storing %s", charname(c)); nlargemap++; } - memcpy(&p, val.data, sizeof(struct __collate_st_char_pri *)); + return p; } @@ -1137,7 +1218,8 @@ int z; if (exists > 0) errx(1, "getchain: \"%s\" is not defined", showwcs(wcs, STR_LEN)); - if ((pn = (struct __collate_st_chain_pri *)malloc(sizeof(struct __collate_st_chain_pri))) == NULL) + if ((pn = (struct __collate_st_chain_pri *) + malloc(sizeof(struct __collate_st_chain_pri))) == NULL) err(1, "getchain: malloc"); for(z = 0; z < COLL_WEIGHTS_MAX; z++) pn->pri[z] = PRI_UNDEFINED; @@ -1237,6 +1319,7 @@ struct symbol *p; int ret; + errx(1, "internal error: getstring called"); key.data = (void *)wcs; key.size = wcslen(wcs) * sizeof(wchar_t); if ((ret = stringdb->get(stringdb, &key, &val, 0)) < 0) @@ -1254,6 +1337,7 @@ err(1, "getstring: Error storing \"%s\"", showwcs(wcs, STR_LEN)); } memcpy(&p, val.data, sizeof(struct symbol *)); + return p; } @@ -1391,19 +1475,32 @@ static char * show(int c) { - static char buf[5]; + static char buf[40]; + char *p, utfbuf[6]; + int i; if (c >=32 && c <= 126) sprintf(buf, "'%c' ", c); - else + else { sprintf(buf, "\\x{%02x}", c); + } + if ((c = wctomb(utfbuf, c)) != -1) { + p = buf + strlen(buf); + *p++ = '('; + for (i = 0; i < c; i++) { + sprintf(p, "%X ", (unsigned char)utfbuf[i]); + p += strlen(p); + } + sprintf(p, ")"); + } + return buf; } static void collate_print_tables(void) { - int i, z; + int i, z, pos; printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n", info.directive[0], info.directive[1], @@ -1436,25 +1533,65 @@ putchar('\n'); } } - printf("Char priority table:\n"); + printf("Char priority table: (utf8 in parens)\n"); { struct __collate_st_char_pri *p2 = __collate_char_pri_table; + for (i = 0; i < UCHAR_MAX + 1; i++, p2++) { + if (p2->map[1].v == 0) + continue; /* Entry not used. */ printf("\t%s :", show(i)); - for(z = 0; z < info.directive_count; z++) - printf(" %d", ntohl(p2->pri[z])); + for(z = 0, pos = 0; z < info.directive_count; z++) { + /* + * Is the last weight in the sequence on + * this position? The assertion here won't hold if some + * character wasn't used at all. Hence, the additional 'if' + * above. + */ + assert(pos <= p2->map[z].v); + if (pos == p2->map[z].v) { + printf(" %d", ntohl(p2->pri[pos])); + pos++; + continue; + } + printf(" \""); + /* Output all expanded weights in sequence. */ + while (pos <= p2->map[z].v) { + printf("%d%s", ntohl(p2->pri[pos]), + pos < p2->map[z].v ? " " : ""); + pos++; + } + printf("\""); + } putchar('\n'); } } if (info.large_pri_count > 0) { struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table; - printf("Large priority table:\n"); + + printf("Large priority table: (utf8 in parens)\n"); for (i = info.large_pri_count; i-- > 0; p2++) { if (p2->val == 0) break; printf("\t%s :", show(ntohl(p2->val))); - for(z = 0; z < info.directive_count; z++) - printf(" %d", ntohl(p2->pri.pri[z])); + for(z = 0, pos = 0; z < info.directive_count; z++) { + /* Is the last weight in the sequence on + * this position? */ + assert(pos <= p2->pri.map[z].v); + if (pos == p2->pri.map[z].v) { + printf(" %d", ntohl(p2->pri.pri[pos])); + pos++; + continue; + } + printf(" \""); + /* Output all expanded weights in sequence. */ + while (pos <= p2->pri.map[z].v) { + printf("%d%s", ntohl(p2->pri.pri[pos]), + pos < p2->pri.map[z].v ? " " : ""); + pos++; + } + printf("\""); + } putchar('\n'); } }