Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 16 Jul 2008 21:44:32 GMT
From:      Konrad Jankowski <konrad@FreeBSD.org>
To:        Perforce Change Reviews <perforce@FreeBSD.org>
Subject:   PERFORCE change 145346 for review
Message-ID:  <200807162144.m6GLiWIh026375@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=145346

Change 145346 by konrad@vspredator on 2008/07/16 21:44:09

	Support for specifing charmap on commandline added.
	It is a big win when generating data for locales in UTF-8, because we will be
	able to parse the charmap just once for all of them (when support for this
	is added).

Affected files ...

.. //depot/projects/soc2008/konrad_collation/colldef.apple/collate.h#5 edit
.. //depot/projects/soc2008/konrad_collation/colldef.apple/common.h#3 edit
.. //depot/projects/soc2008/konrad_collation/colldef.apple/parse.y#3 edit
.. //depot/projects/soc2008/konrad_collation/colldef.apple/scan.l#5 edit
.. //depot/projects/soc2008/konrad_collation/scripts/localedef.sh#2 edit

Differences ...

==== //depot/projects/soc2008/konrad_collation/colldef.apple/collate.h#5 (text+ko) ====

@@ -45,6 +45,7 @@
 #define COLLATE_VERSION1_1 "1.1\n"
 #define COLLATE_VERSION1_1A "1.1A\n"
 #define COLLATE_VERSION1_2 "1.2\n"
+#define COLLATE_VERSION1_3 "1.3\n"
 /* see discussion in string/FreeBSD/strxfrm for this value */
 #define COLLATE_MAX_PRIORITY ((1 << 24) - 1)
 

==== //depot/projects/soc2008/konrad_collation/colldef.apple/common.h#3 (text+ko) ====

@@ -29,6 +29,7 @@
 };
 
 extern int line_no;
+extern int charmap_cmdline;
 
 struct symbol *getsymbol(const wchar_t *, int);
 extern char *showwcs(const wchar_t *, int);

==== //depot/projects/soc2008/konrad_collation/colldef.apple/parse.y#3 (text+ko) ====

@@ -131,9 +131,10 @@
 %token <str> ELEM
 %token <ch> CHAR
 %token <ch> ORDER_DIRECTIVE
-%token EXPAND
+%token EXPAND CHARMAP
 %%
-collate : datafile {
+collate : CHARMAP { printf("parsing charmap\n"); } charmap_list
+	| datafile {
 	FILE *fp;
 	int localedef = (stringdb != NULL);
 	int z;
@@ -304,7 +305,7 @@
 		err(EX_UNAVAILABLE, "can't open destination file %s",
 		    out_file);
 
-	strcpy(__collate_version, COLLATE_VERSION1_1A);
+	strcpy(__collate_version, COLLATE_VERSION1_3);
 	if (fwrite(__collate_version, sizeof(__collate_version), 1, fp) != 1)
 		err(EX_IOERR,
 		"IO error writting collate version to destination file %s",
@@ -436,6 +437,9 @@
 	| collating_element
 	| collating_symbol
 ;
+charmap_list : charmap '\n'
+	     | charmap_list charmap '\n'
+;
 collating_element : COLLATING_ELEMENT ELEM FROM STRING {
 	int len;
 	struct symbol *s;
@@ -832,6 +836,10 @@
 charmap : DEFN CHAR {
 	int len = wcslen($1);
 	struct symbol *s;
+
+#if 0
+	printf("charmap\n");
+#endif
 	if (len > CHARMAP_SYMBOL_LEN)
 		yyerror("Charmap symbol name '%s' is too long", showwcs($1, CHARMAP_SYMBOL_LEN));
 	s = getsymbol($1, NOTEXISTS);
@@ -993,10 +1001,12 @@
 	for (ch = 0; ch <= UCHAR_MAX; ch++)
 		for(z = 0; z < COLL_WEIGHTS_MAX; z++)
 			__collate_char_pri_table[ch].pri[z] = PRI_UNDEFINED;
+	if (setlocale(LC_ALL, "en_US.UTF-8") == NULL) 
+		errx(1, "setlocale() failed");
 #ifdef COLLATE_DEBUG
-	while((ch = getopt(ac, av, ":do:I:")) != -1) {
+	while((ch = getopt(ac, av, ":do:I:m:")) != -1) {
 #else
-	while((ch = getopt(ac, av, ":o:I:")) != -1) {
+	while((ch = getopt(ac, av, ":o:I:m:")) != -1) {
 #endif
 		switch (ch)
 		{
@@ -1013,6 +1023,14 @@
 			strlcpy(map_name, optarg, sizeof(map_name));
 			break;
 
+		  case 'm':
+			charmap_cmdline = 1;
+			if ((yyin = fopen(optarg, "r")) == NULL)
+				err(EX_UNAVAILABLE, "can't open charmap file %s", optarg);
+			yyparse();
+			printf("charmap decoding finished\n");
+		  break;
+
 		  default:
 			usage();
 		}
@@ -1023,7 +1041,8 @@
 		if ((yyin = fopen(*av, "r")) == NULL)
 			err(EX_UNAVAILABLE, "can't open source file %s", *av);
 	}
-	setlocale(LC_ALL, "en_US.UTF-8");
+	charmap_cmdline = 0;
+	line_no = 1;
 	yyparse();
 	return 0;
 }

==== //depot/projects/soc2008/konrad_collation/colldef.apple/scan.l#5 (text+ko) ====

@@ -45,6 +45,7 @@
 void yyerror(char *, ...);
 
 int line_no = 1, save_no, fromsubs;
+int charmap_cmdline;
 wchar_t buf0[BUFSIZE], *ptr;
 wchar_t *buf = buf0;
 wchar_t bufstr[BUFSIZE], *ptrsave;
@@ -112,7 +113,17 @@
 	return '\n';
 }
 <ldef>\<                { ptr = buf; BEGIN(elem); }
-<INITIAL>\<             { ptr = buf; fromsubs = 0; BEGIN(s_name); }
+<INITIAL>\<             {
+	ptr = buf;
+	fromsubs = 0;
+	if (charmap_cmdline) {
+		ptr = buf;
+		*ptr++ = '<';
+		BEGIN(defn);
+		return CHARMAP;
+	} else
+		BEGIN(s_name);
+}
 <*>^#.*\n		line_no++;
 ^\n			line_no++;
 <INITIAL>\\\n           line_no++;
@@ -130,7 +141,7 @@
 }
 <INITIAL,nchar>\n       {
 	line_no++;
-	if (map_fp != NULL) {
+	if (map_fp != NULL || charmap_cmdline) {
 		ptr = buf;
 		BEGIN(defn);
 	}
@@ -191,7 +202,7 @@
 		errx(EX_UNAVAILABLE, "map expected near line %u of %s",
 		     line_no, map_name);
 	*ptr = 0;
-	if (localedefmode && *buf == '<' && ptr[-1] == '>') {
+	if ((localedefmode || charmap_cmdline) && *buf == '<' && ptr[-1] == '>') {
 		if (ptr == buf + 2)
 			errx(EX_UNAVAILABLE, "map expected near line %u of %s",
 		    	line_no, map_name);
@@ -332,7 +343,10 @@
 <s_name,string,defn,elem>\n       {
 	const char *s = (map_fp != NULL) ? map_name : "input";
 
-	errx(EX_UNAVAILABLE, "unterminated map/name/string near line %u of %s", line_no, s);
+	if (charmap_cmdline)	
+		s = optarg;
+	errx(EX_UNAVAILABLE, "unterminated map/name/string near line %u of %s (ptr-buf=%d)",
+	    line_no, s, ptr - buf);
 }
 <s_name,string,nchar,elem><<EOF>> {
 	const char *s = (map_fp != NULL) ? map_name : "input";
@@ -408,8 +422,10 @@
 			BEGIN(ldef);
 		else
 			BEGIN(INITIAL);
-	} else
+	} else {
+		BEGIN(INITIAL);
 		yyterminate();
+	}
 }
 %%
 #ifdef FLEX_DEBUG

==== //depot/projects/soc2008/konrad_collation/scripts/localedef.sh#2 (text+ko) ====

@@ -21,8 +21,8 @@
 # Basically, just cut out the collation data.
 sed -n -e "$LINE1,${LINE2}p" $SRC | tr -d '\r$' | sed -e 's/^*/#/g' > $OUTFILE
 
-
 #printf "1a\ncharmap /usr/home/versus/colldef.apple/data2/UTF-8.cm.new\n.\nwq\n" | ed -s $OUTFILE
+# insert second line with charmap specification
 printf "1a\ncharmap ../posix/UTF-8.cm\n.\nwq\n" | ed -s $OUTFILE
 
 # Optional white space compression. Not needed for these colldef version.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200807162144.m6GLiWIh026375>