Date: Wed, 16 Jul 2008 21:44:32 GMT From: Konrad Jankowski <konrad@FreeBSD.org> To: Perforce Change Reviews <perforce@FreeBSD.org> Subject: PERFORCE change 145346 for review Message-ID: <200807162144.m6GLiWIh026375@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=145346 Change 145346 by konrad@vspredator on 2008/07/16 21:44:09 Support for specifing charmap on commandline added. It is a big win when generating data for locales in UTF-8, because we will be able to parse the charmap just once for all of them (when support for this is added). Affected files ... .. //depot/projects/soc2008/konrad_collation/colldef.apple/collate.h#5 edit .. //depot/projects/soc2008/konrad_collation/colldef.apple/common.h#3 edit .. //depot/projects/soc2008/konrad_collation/colldef.apple/parse.y#3 edit .. //depot/projects/soc2008/konrad_collation/colldef.apple/scan.l#5 edit .. //depot/projects/soc2008/konrad_collation/scripts/localedef.sh#2 edit Differences ... ==== //depot/projects/soc2008/konrad_collation/colldef.apple/collate.h#5 (text+ko) ==== @@ -45,6 +45,7 @@ #define COLLATE_VERSION1_1 "1.1\n" #define COLLATE_VERSION1_1A "1.1A\n" #define COLLATE_VERSION1_2 "1.2\n" +#define COLLATE_VERSION1_3 "1.3\n" /* see discussion in string/FreeBSD/strxfrm for this value */ #define COLLATE_MAX_PRIORITY ((1 << 24) - 1) ==== //depot/projects/soc2008/konrad_collation/colldef.apple/common.h#3 (text+ko) ==== @@ -29,6 +29,7 @@ }; extern int line_no; +extern int charmap_cmdline; struct symbol *getsymbol(const wchar_t *, int); extern char *showwcs(const wchar_t *, int); ==== //depot/projects/soc2008/konrad_collation/colldef.apple/parse.y#3 (text+ko) ==== @@ -131,9 +131,10 @@ %token <str> ELEM %token <ch> CHAR %token <ch> ORDER_DIRECTIVE -%token EXPAND +%token EXPAND CHARMAP %% -collate : datafile { +collate : CHARMAP { printf("parsing charmap\n"); } charmap_list + | datafile { FILE *fp; int localedef = (stringdb != NULL); int z; @@ -304,7 +305,7 @@ err(EX_UNAVAILABLE, "can't open destination file %s", out_file); - strcpy(__collate_version, COLLATE_VERSION1_1A); + strcpy(__collate_version, COLLATE_VERSION1_3); if (fwrite(__collate_version, sizeof(__collate_version), 1, fp) != 1) err(EX_IOERR, "IO error writting collate version to destination file %s", @@ -436,6 +437,9 @@ | collating_element | collating_symbol ; +charmap_list : charmap '\n' + | charmap_list charmap '\n' +; collating_element : COLLATING_ELEMENT ELEM FROM STRING { int len; struct symbol *s; @@ -832,6 +836,10 @@ charmap : DEFN CHAR { int len = wcslen($1); struct symbol *s; + +#if 0 + printf("charmap\n"); +#endif if (len > CHARMAP_SYMBOL_LEN) yyerror("Charmap symbol name '%s' is too long", showwcs($1, CHARMAP_SYMBOL_LEN)); s = getsymbol($1, NOTEXISTS); @@ -993,10 +1001,12 @@ for (ch = 0; ch <= UCHAR_MAX; ch++) for(z = 0; z < COLL_WEIGHTS_MAX; z++) __collate_char_pri_table[ch].pri[z] = PRI_UNDEFINED; + if (setlocale(LC_ALL, "en_US.UTF-8") == NULL) + errx(1, "setlocale() failed"); #ifdef COLLATE_DEBUG - while((ch = getopt(ac, av, ":do:I:")) != -1) { + while((ch = getopt(ac, av, ":do:I:m:")) != -1) { #else - while((ch = getopt(ac, av, ":o:I:")) != -1) { + while((ch = getopt(ac, av, ":o:I:m:")) != -1) { #endif switch (ch) { @@ -1013,6 +1023,14 @@ strlcpy(map_name, optarg, sizeof(map_name)); break; + case 'm': + charmap_cmdline = 1; + if ((yyin = fopen(optarg, "r")) == NULL) + err(EX_UNAVAILABLE, "can't open charmap file %s", optarg); + yyparse(); + printf("charmap decoding finished\n"); + break; + default: usage(); } @@ -1023,7 +1041,8 @@ if ((yyin = fopen(*av, "r")) == NULL) err(EX_UNAVAILABLE, "can't open source file %s", *av); } - setlocale(LC_ALL, "en_US.UTF-8"); + charmap_cmdline = 0; + line_no = 1; yyparse(); return 0; } ==== //depot/projects/soc2008/konrad_collation/colldef.apple/scan.l#5 (text+ko) ==== @@ -45,6 +45,7 @@ void yyerror(char *, ...); int line_no = 1, save_no, fromsubs; +int charmap_cmdline; wchar_t buf0[BUFSIZE], *ptr; wchar_t *buf = buf0; wchar_t bufstr[BUFSIZE], *ptrsave; @@ -112,7 +113,17 @@ return '\n'; } <ldef>\< { ptr = buf; BEGIN(elem); } -<INITIAL>\< { ptr = buf; fromsubs = 0; BEGIN(s_name); } +<INITIAL>\< { + ptr = buf; + fromsubs = 0; + if (charmap_cmdline) { + ptr = buf; + *ptr++ = '<'; + BEGIN(defn); + return CHARMAP; + } else + BEGIN(s_name); +} <*>^#.*\n line_no++; ^\n line_no++; <INITIAL>\\\n line_no++; @@ -130,7 +141,7 @@ } <INITIAL,nchar>\n { line_no++; - if (map_fp != NULL) { + if (map_fp != NULL || charmap_cmdline) { ptr = buf; BEGIN(defn); } @@ -191,7 +202,7 @@ errx(EX_UNAVAILABLE, "map expected near line %u of %s", line_no, map_name); *ptr = 0; - if (localedefmode && *buf == '<' && ptr[-1] == '>') { + if ((localedefmode || charmap_cmdline) && *buf == '<' && ptr[-1] == '>') { if (ptr == buf + 2) errx(EX_UNAVAILABLE, "map expected near line %u of %s", line_no, map_name); @@ -332,7 +343,10 @@ <s_name,string,defn,elem>\n { const char *s = (map_fp != NULL) ? map_name : "input"; - errx(EX_UNAVAILABLE, "unterminated map/name/string near line %u of %s", line_no, s); + if (charmap_cmdline) + s = optarg; + errx(EX_UNAVAILABLE, "unterminated map/name/string near line %u of %s (ptr-buf=%d)", + line_no, s, ptr - buf); } <s_name,string,nchar,elem><<EOF>> { const char *s = (map_fp != NULL) ? map_name : "input"; @@ -408,8 +422,10 @@ BEGIN(ldef); else BEGIN(INITIAL); - } else + } else { + BEGIN(INITIAL); yyterminate(); + } } %% #ifdef FLEX_DEBUG ==== //depot/projects/soc2008/konrad_collation/scripts/localedef.sh#2 (text+ko) ==== @@ -21,8 +21,8 @@ # Basically, just cut out the collation data. sed -n -e "$LINE1,${LINE2}p" $SRC | tr -d '\r$' | sed -e 's/^*/#/g' > $OUTFILE - #printf "1a\ncharmap /usr/home/versus/colldef.apple/data2/UTF-8.cm.new\n.\nwq\n" | ed -s $OUTFILE +# insert second line with charmap specification printf "1a\ncharmap ../posix/UTF-8.cm\n.\nwq\n" | ed -s $OUTFILE # Optional white space compression. Not needed for these colldef version.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200807162144.m6GLiWIh026375>