Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 1 Sep 2008 11:22:47 GMT
From:      Konrad Jankowski <konrad@FreeBSD.org>
To:        Perforce Change Reviews <perforce@FreeBSD.org>
Subject:   PERFORCE change 148978 for review
Message-ID:  <200809011122.m81BMlRF030916@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=148978

Change 148978 by konrad@vspredator on 2008/09/01 11:22:34

	       Colldef with proper expansion support added. This is not a production version.
	       It will go through a process of space optimisation.

Affected files ...

.. //depot/projects/soc2008/konrad_collation/colldef.apple/collate.h#7 edit
.. //depot/projects/soc2008/konrad_collation/colldef.apple/parse.y#9 edit

Differences ...

==== //depot/projects/soc2008/konrad_collation/colldef.apple/collate.h#7 (text+ko) ====

@@ -33,6 +33,8 @@
 #include <sys/cdefs.h>
 #ifndef __LIBC__
 #include <sys/types.h>
+#else
+#include <setlocale.h>	/* for ENCODING_LEN */
 #endif /* !__LIBC__ */
 #include <limits.h>
 
@@ -46,6 +48,7 @@
 #define COLLATE_VERSION1_1A "1.1A\n"
 #define COLLATE_VERSION1_2 "1.2\n"
 #define COLLATE_VERSION1_3 "1.3\n"
+#define COLLATE_VERSION1_4 "1.4\n"
 /* see discussion in string/FreeBSD/strxfrm for this value */
 #define COLLATE_MAX_PRIORITY ((1 << 24) - 1)
 
@@ -63,7 +66,7 @@
 struct __collate_st_info {
 	__uint8_t directive[COLL_WEIGHTS_MAX];
 	__uint8_t flags;
-#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+#if _BYTE_ORDER == _LITTLE_ENDIAN
 	unsigned int directive_count:4;
 	unsigned int chain_max_len:4;
 #else
@@ -76,10 +79,18 @@
 	__int32_t large_pri_count;
 };
 
+struct weight_map_struct
+{
+        unsigned int v:4;
+};
+
+#define COLL_WEIGHTS_REAL (COLL_WEIGHTS_MAX * 4)
 struct __collate_st_char_pri {
-	__int32_t pri[COLL_WEIGHTS_MAX];
+	struct weight_map_struct map[COLL_WEIGHTS_MAX];
+	__int32_t pri[COLL_WEIGHTS_REAL];
 };
 struct __collate_st_chain_pri {
+	struct weight_map_struct map[COLL_WEIGHTS_MAX];
 	wchar_t str[STR_LEN];
 	__int32_t pri[COLL_WEIGHTS_MAX];
 };
@@ -92,29 +103,34 @@
 	wchar_t str[STR_LEN];
 };
 
-#ifndef __LIBC__
+#ifdef __LIBC__
+struct __locale_st_collate {
+	char __encoding[ENCODING_LEN + 1];
+	struct __collate_st_info __info;
+	struct __collate_st_subst *__substitute_table[COLL_WEIGHTS_MAX];
+	struct __collate_st_chain_pri *__chain_pri_table;
+	struct __collate_st_large_char_pri *__large_char_pri_table;
+	struct __collate_st_char_pri __char_pri_table[UCHAR_MAX + 1];
+};
+#endif
+
 extern int __collate_load_error;
 extern int __collate_substitute_nontrivial;
-#define __collate_char_pri_table (*__collate_char_pri_table_ptr)
-extern struct __collate_st_char_pri __collate_char_pri_table[UCHAR_MAX + 1];
-extern struct __collate_st_chain_pri *__collate_chain_pri_table;
-extern __int32_t *__collate_chain_equiv_table;
-extern struct __collate_st_info __collate_info;
-#endif /* !__LIBC__ */
+extern struct __locale_st_collate *__collate_data;
 
 __BEGIN_DECLS
 #ifdef __LIBC__
-wchar_t	*__collate_mbstowcs(const char *, locale_t);
+wchar_t	*__collate_mbstowcs(const char *);
 wchar_t	*__collate_wcsdup(const wchar_t *);
-wchar_t	*__collate_substitute(const wchar_t *, int, locale_t);
-int	__collate_load_tables(const char *, locale_t);
-void	__collate_lookup_l(const wchar_t *, int *, int *, int *, locale_t);
-void	__collate_lookup_which(const wchar_t *, int *, int *, int, locale_t);
-void	__collate_xfrm(const wchar_t *, wchar_t **, locale_t);
-int	__collate_range_cmp(wchar_t, wchar_t, locale_t);
-size_t	__collate_collating_symbol(wchar_t *, size_t, const char *, size_t, mbstate_t *, locale_t);
-int	__collate_equiv_class(const char *, size_t, mbstate_t *, locale_t);
-size_t	__collate_equiv_match(int, wchar_t *, size_t, wchar_t, const char *, size_t, mbstate_t *, size_t *, locale_t);
+wchar_t	*__collate_substitute(const wchar_t *, int);
+int	__collate_load_tables(const char *);
+void	__collate_lookup_l(const wchar_t *, int *, int *, int *);
+void	__collate_lookup_which(const wchar_t *, int *, int *, int);
+void	__collate_xfrm(const wchar_t *, wchar_t **);
+int	__collate_range_cmp(wchar_t, wchar_t);
+size_t	__collate_collating_symbol(wchar_t *, size_t, const char *, size_t, mbstate_t *);
+int	__collate_equiv_class(const char *, size_t, mbstate_t *);
+size_t	__collate_equiv_match(int, wchar_t *, size_t, wchar_t, const char *, size_t, mbstate_t *, size_t *);
 #else /* !__LIBC__ */
 void	__collate_lookup(const unsigned char *, int *, int *, int *);
 #endif /* __LIBC__ */

==== //depot/projects/soc2008/konrad_collation/colldef.apple/parse.y#9 (text+ko) ====

@@ -29,6 +29,7 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD: src/usr.bin/colldef/parse.y,v 1.31 2002/10/16 12:56:22 charnier Exp $");
 
+#include <assert.h>
 #include <arpa/inet.h>
 #include <err.h>
 #include <stdarg.h>
@@ -89,12 +90,19 @@
 static DB *chaindb;
 static int nchain = 0;
 static DB *stringdb;
-static struct symbol prev_weight_table[COLL_WEIGHTS_MAX];
-static struct symbol prev2_weight_table[COLL_WEIGHTS_MAX];
-static struct symbol weight_table[COLL_WEIGHTS_MAX];
+
+static struct symbol prev_weight_table[COLL_WEIGHTS_REAL];
+static struct symbol prev2_weight_table[COLL_WEIGHTS_REAL];
+static struct symbol weight_table[COLL_WEIGHTS_REAL];
+
+struct weight_map_struct weight_map[COLL_WEIGHTS_MAX];
+struct weight_map_struct prev_weight_map[COLL_WEIGHTS_MAX];
+struct weight_map_struct prev2_weight_map[COLL_WEIGHTS_MAX];
+
 static int prev_line = LINE_NONE;
 static struct symbol *prev_elem;
 static int weight_index = 0;
+static int map_idx = 0;
 static int allow_ellipsis = 0;
 static struct symbol sym_ellipsis = {SYMBOL_ELLIPSIS, PRI_UNDEFINED, L"", {0}};
 static struct symbol sym_ignore = {SYMBOL_IGNORE, PRI_IGNORE, L"", {0}};
@@ -113,7 +121,6 @@
 #endif
 struct __collate_st_info info = {{DIRECTIVE_FORWARD, DIRECTIVE_FORWARD}, 0, 0, 0, {PRI_UNDEFINED, PRI_UNDEFINED}, {PRI_UNDEFINED}, 0, 0};
 
-/* Some of the code expects COLL_WEIGHTS_MAX == 2 */
 int directive_count = COLL_WEIGHTS_MAX;
 
 const char *out_file = "LC_COLLATE";
@@ -209,8 +216,10 @@
 			/* we don't set the byte order of t->val, since we
 			 * need it for sorting */
 			t->val = cval;
-			for(z = 0; z < directive_count; z++)
+			for(z = 0; z < COLL_WEIGHTS_REAL; z++)
 				t->pri.pri[z] = htonl(p->pri[z]);
+			for (z = 0; z < directive_count; z++)
+				t->pri.map[z].v = p->map[z].v;
 			t++;
 			flags = R_NEXT;
 		}
@@ -232,17 +241,20 @@
 			int flags = R_FIRST;
 			DBT key, val;
 			struct symbol *v;
+
 			while((ret = charmapdb->seq(charmapdb, &key, &val, flags)) == 0) {
 				memcpy(&v, val.data, sizeof(struct symbol *));
 				switch(v->type) {
 				case SYMBOL_CHAR: {
 					struct __collate_st_char_pri *p = haspri(v->u.wc);
+
 					if (!p || p->pri[0] == PRI_UNDEFINED)
 						warnx("<%s> was not defined", showwcs((const wchar_t *)key.data, key.size / sizeof(wchar_t)));
 					break;
 				}
 				case SYMBOL_CHAIN: {
 					struct __collate_st_chain_pri *p = getchain(v->u.str, EXISTS);
+
 					if (p->pri[0] == PRI_UNDEFINED)
 						warnx("<%s> was not defined", showwcs((const wchar_t *)key.data, key.size / sizeof(wchar_t)));
 					break;
@@ -307,34 +319,34 @@
 		err(EX_UNAVAILABLE, "can't open destination file %s",
 		    out_file);
 
-	strcpy(__collate_version, COLLATE_VERSION1_3);
+	strcpy(__collate_version, COLLATE_VERSION1_4);
 	if (fwrite(__collate_version, sizeof(__collate_version), 1, fp) != 1)
 		err(EX_IOERR,
 		"IO error writting collate version to destination file %s",
 		    out_file);
-#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+#if _BYTE_ORDER == _LITTLE_ENDIAN
 	for(z = 0; z < directive_count; z++) {
 		info.undef_pri[z] = htonl(info.undef_pri[z]);
 		info.subst_count[z] = htonl(info.subst_count[z]);
 	}
 	info.chain_count = htonl(info.chain_count);
 	info.large_pri_count = htonl(info.large_pri_count);
-#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
+#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
 	if (fwrite(&info, sizeof(info), 1, fp) != 1)
 		err(EX_IOERR,
 		"IO error writting collate info to destination file %s",
 		    out_file);
-#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+#if _BYTE_ORDER == _LITTLE_ENDIAN
 	{
 		int i, z;
 		struct __collate_st_char_pri *p = __collate_char_pri_table;
 
 		for(i = UCHAR_MAX + 1; i-- > 0; p++) {
-			for(z = 0; z < directive_count; z++)
+			for(z = 0; z < COLL_WEIGHTS_REAL; z++)
 				p->pri[z] = htonl(p->pri[z]);
 		}
 	}
-#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
+#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
 	if (fwrite(__collate_char_pri_table,
 		   sizeof(__collate_char_pri_table), 1, fp) != 1)
 		err(EX_IOERR,
@@ -342,14 +354,15 @@
 		    out_file);
 	for(z = 0; z < directive_count; z++) {
 		if (nsubst[z] > 0) {
-#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+#if _BYTE_ORDER == _LITTLE_ENDIAN
 			struct __collate_st_subst *t = __collate_substitute_table[z];
 			int i;
+
 			for(i = nsubst[z]; i > 0; i--) {
 				t->val = htonl(t->val);
 				t++;
 			}
-#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
+#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
 			if ((int)fwrite(__collate_substitute_table[z], sizeof(struct __collate_st_subst), nsubst[z], fp) != nsubst[z])
 				err(EX_IOERR,
 				"IO error writting large substprim table %d to destination file %s",
@@ -357,7 +370,7 @@
 		}
 	}
 	if (nchain > 0) {
-#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+#if _BYTE_ORDER == _LITTLE_ENDIAN
 		int i, j, z;
 		struct __collate_st_chain_pri *p = __collate_chain_pri_table;
 		wchar_t *w;
@@ -368,7 +381,7 @@
 			for(z = 0; z < directive_count; z++)
 				p->pri[z] = htonl(p->pri[z]);
 		}
-#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
+#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
 		if (fwrite(__collate_chain_pri_table,
 			   sizeof(*__collate_chain_pri_table), nchain, fp) !=
 			   (size_t)nchain)
@@ -378,14 +391,14 @@
 	}
 
 	if (nlargemap > 0) {
-#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+#if _BYTE_ORDER == _LITTLE_ENDIAN
 		struct __collate_st_large_char_pri *t = __collate_large_char_pri_table;
 		int i;
 		for(i = 0; i < nlargemap; i++) {
 			t->val = htonl(t->val);
 			t++;
 		}
-#endif /* __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN */
+#endif /* _BYTE_ORDER == _LITTLE_ENDIAN */
 		if ((int)fwrite(__collate_large_char_pri_table, sizeof(struct __collate_st_large_char_pri), nlargemap, fp) != nlargemap)
 			err(EX_IOERR,
 			"IO error writting large pri tables to destination file %s",
@@ -396,7 +409,6 @@
 		err(EX_IOERR, "IO error closing destination file %s",
 		    out_file);
 #ifdef COLLATE_DEBUG
-	/* Do it first, before conversion to network byte order. */
 	if (debug)
 		collate_print_tables();
 #endif
@@ -511,7 +523,7 @@
 	| ELEM {
 	struct symbol *s = getsymbol($1, EXISTS);
 
-#ifdef VSDEBUG
+#ifdef VSDEBUG2
 	printf("\n%s(%d) ", showwcs(s->name, CHARMAP_SYMBOL_LEN), s->u.wc);
 #endif
 	if (s->val != PRI_UNDEFINED)
@@ -540,6 +552,7 @@
 		s->val = prim_pri;
 	prim_pri = s->val + 1;
 	weight_index = 0;
+	map_idx = 0;
 }				weights2 {
 	int i;
 	struct symbol *s = getsymbol($1, EXISTS);
@@ -547,16 +560,21 @@
 		if (weight_index != 0)
 			yyerror("Can't specify weights for collating symbol <%s>", showwcs($1, CHARMAP_SYMBOL_LEN));
 	} else if (weight_index == 0) {
-		for(i = 0; i < directive_count; i++)
+		for(i = 0; i < directive_count; i++) {
 			weight_table[i] = *s;
-	} else if (weight_index != directive_count)
+			/* Store the end, inclusive. */
+			weight_map[i].v = i;
+		}
+	} else if (map_idx != directive_count)
 		yyerror("Not enough weights specified");
 	memcpy(prev_weight_table, weight_table, sizeof(weight_table));
+	memcpy(prev_weight_map, weight_map, sizeof(weight_map));
 	prev_line = LINE_NORMAL;
 	prev_elem = s;
 }
-	| ELLIPSIS { weight_index = 0; allow_ellipsis = 1; } weights {
+	| ELLIPSIS { weight_index = 0; allow_ellipsis = 1; map_idx = 0; } weights {
 	int i;
+
 	if (prev_line == LINE_ELLIPSIS)
 		yyerror("Illegal sequential ellipsis lines");
 	if (prev_line == LINE_UNDEFINED)
@@ -564,11 +582,13 @@
 	if (prev_line == LINE_NONE)
 		yyerror("Ellipsis line must follow a collating identifier lines");
 	if (weight_index == 0) {
-		for(i = 0; i < directive_count; i++)
+		for(i = 0; i < directive_count; i++) {
 			weight_table[i] = sym_ellipsis;
-	} else if (weight_index != directive_count)
+			weight_map[i].v = i;
+		}
+	} else if (map_idx != directive_count)
 		yyerror("Not enough weights specified");
-	for(i = 0; i < directive_count; i++) {
+	for(i = 0; i < weight_index; i++) {
 		if (weight_table[i].type != SYMBOL_ELLIPSIS)
 			continue;
 		switch (prev_weight_table[i].type) {
@@ -584,6 +604,8 @@
 	}
 	memcpy(prev2_weight_table, prev_weight_table, sizeof(prev_weight_table));
 	memcpy(prev_weight_table, weight_table, sizeof(weight_table));
+	memcpy(prev2_weight_map, prev_weight_map, sizeof(weight_map));
+	memcpy(prev_weight_map, weight_map, sizeof(weight_map));
 	prev_line = LINE_ELLIPSIS;
 	allow_ellipsis = 0;
 }
@@ -592,16 +614,21 @@
 		yyerror("Multiple UNDEFINED lines not allowed");
 	sym_undefined.val = prim_pri++;
 	weight_index = 0;
+	map_idx = 0;
 	allow_ellipsis = 1;
 }				 weights {
 	int i;
 	if (weight_index == 0) {
 		weight_table[0] = sym_undefined;
-		for(i = 1; i < directive_count; i++)
+		weight_map[0].v = 0;
+		for(i = 1; i < directive_count; i++) {
+			weight_map[i].v = i;
 			weight_table[i] = sym_ellipsis;
-	} else if (weight_index != directive_count)
+		}
+	} else if (map_idx != directive_count)
 		yyerror("Not enough weights specified");
 	memcpy(prev_weight_table, weight_table, sizeof(weight_table));
+	memcpy(prev_weight_map, weight_map, sizeof(weight_map));
 	prev_line = LINE_UNDEFINED;
 }
 ;
@@ -609,7 +636,7 @@
 	| order_lines2 order_line2 '\n'
 ;
 order_line2 :
-	| ELEM { weight_index = 0; } weights2 {
+	| ELEM { weight_index = 0; map_idx = 0; } weights2 {
 	int i;
 	struct symbol *s = getsymbol($1, EXISTS);
 
@@ -619,12 +646,15 @@
 		if (weight_index != 0)
 			yyerror("Can't specify weights for collating symbol <%s>", showwcs($1, CHARMAP_SYMBOL_LEN));
 	} else if (weight_index == 0) {
-		for(i = 0; i < directive_count; i++)
+		for(i = 0; i < directive_count; i++) {
 			weight_table[i] = *s;
-	} else if (weight_index != directive_count)
+			weight_map[i].v = i;
+		}
+	} else if (map_idx != directive_count)
 		yyerror("Not enough weights specified");
 	if (prev_line == LINE_ELLIPSIS) {
 		int w, x;
+
 		for(i = 0; i < directive_count; i++) {
 			switch (prev_weight_table[i].type) {
 			case SYMBOL_CHAR:
@@ -633,13 +663,13 @@
 			case SYMBOL_SYMBOL:
 				for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) {
 					struct __collate_st_char_pri *p = getpri(w);
+
 					if (p->pri[i] != PRI_UNDEFINED)
 						yyerror("Char 0x%02x previously defined", w);
 					p->pri[i] = prev_weight_table[i].val;
 				}
 				break;
 			case SYMBOL_ELLIPSIS:
-
 				switch (weight_table[i].type) {
 				case SYMBOL_STRING:
 					yyerror("Strings can't be endpoints of ellipsis");
@@ -650,11 +680,14 @@
 				case SYMBOL_SYMBOL:
 					yyerror("Collation symbols can't be endpoints of ellipsis");
 				}
-				if (s->val - prev_elem->val != weight_table[i].val - prev2_weight_table[i].val)
+				if (s->val - prev_elem->val !=
+				    weight_table[i].val -
+				    prev2_weight_table[i].val)
 					yyerror("Range mismatch in weight %d", i);
 				x = prev2_weight_table[i].val + 1;
 				for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) {
 					struct __collate_st_char_pri *p = getpri(w);
+
 					if (p->pri[i] != PRI_UNDEFINED)
 						yyerror("Char 0x%02x previously defined", w);
 					p->pri[i] = x++;
@@ -663,6 +696,7 @@
 			case SYMBOL_STRING:
 				for (w = prev_elem->u.wc + 1; w < s->u.wc; w++) {
 					struct __collate_st_char_pri *p = getpri(w);
+
 					if (p->pri[i] != PRI_UNDEFINED)
 						yyerror("Char 0x%02x previously defined", w);
 					putsubst(w, i, prev_weight_table[i].u.str);
@@ -675,7 +709,13 @@
 	switch(s->type) {
 	case SYMBOL_CHAR: {
 		struct __collate_st_char_pri *p = getpri(s->u.wc);
-		for(i = 0; i < directive_count; i++) {
+
+#ifdef VSDEBUG
+		fprintf(stderr, "%s ", showwcs(s->name, CHARMAP_SYMBOL_LEN));
+		fprintf(stderr, "weights=%d ", weight_index);
+#endif
+		/* This is the main piece of code. */
+		for(i = 0; i < weight_index; i++) {
 			switch (weight_table[i].type) {
 			case SYMBOL_CHAR:
 			case SYMBOL_CHAIN:
@@ -684,6 +724,9 @@
 				if (p->pri[i] != PRI_UNDEFINED)
 					yyerror("Char 0x%02x previously defined", s->u.wc);
 				p->pri[i] = weight_table[i].val;
+#ifdef VSDEBUG
+				fprintf(stderr, " weight[%d]=%d", i, p->pri[i]);
+#endif
 				break;
 			case SYMBOL_STRING:
 				if (p->pri[i] != PRI_UNDEFINED)
@@ -692,41 +735,59 @@
 				p->pri[i] = weight_table[i].val;
 				break;
 			}
+#if 0
+			default:
+				errx(1, "unrecognized symbol type: %d", weight_table[i].type);
+#endif
 		}
+#ifndef NDEBUG
+		for (i = 0; i < COLL_WEIGHTS_MAX - 1; i++)
+			assert(weight_map[i].v < weight_map[i + 1].v);
+#endif
+		memcpy(p->map, weight_map, sizeof(p->map));
+#ifdef VSDEBUG
+		fputc('\n', stderr);
+#endif
 		break;
 	}
 	case SYMBOL_CHAIN: {
 		struct __collate_st_chain_pri *p = getchain(s->u.str, EXISTS);
-		for(i = 0; i < directive_count; i++) {
+
+		for(i = 0; i < weight_index; i++) {
 			switch (weight_table[i].type) {
 			case SYMBOL_CHAR:
 			case SYMBOL_CHAIN:
 			case SYMBOL_IGNORE:
 			case SYMBOL_SYMBOL:
 				if (p->pri[i] != PRI_UNDEFINED)
-					yyerror("Chain %s previously defined", showwcs(s->u.str, STR_LEN));
+					yyerror("Chain %s previously defined",
+					    showwcs(s->u.str, STR_LEN));
 				p->pri[i] = weight_table[i].val;
 				break;
 			case SYMBOL_STRING :
 				if (wcsncmp(s->u.str, weight_table[i].u.str, STR_LEN) != 0)
 					yyerror("Chain/string mismatch");
 				if (p->pri[i] != PRI_UNDEFINED)
-					yyerror("Chain %s previously defined", showwcs(s->u.str, STR_LEN));
+					yyerror("Chain %s previously defined",
+					    showwcs(s->u.str, STR_LEN));
 				/* negative value mean don't substitute
 				 * the chain, but it is in an
 				 * equivalence class */
 				p->pri[i] = -weight_table[i].val;
 			}
 		}
+		memcpy(p->map, weight_map, sizeof(p->map));
 		break;
 	}
 	}
 	memcpy(prev_weight_table, weight_table, sizeof(weight_table));
+	memcpy(prev_weight_map, weight_map, sizeof(weight_map));
 	prev_line = LINE_NORMAL;
 	prev_elem = s;
 }
-	| ELLIPSIS { weight_index = 0; allow_ellipsis = 1; } weights {
+	| ELLIPSIS { weight_index = 0; map_idx = 0; allow_ellipsis = 1; } weights {
 	int i;
+
 	if (prev_line == LINE_ELLIPSIS)
 		yyerror("Illegal sequential ellipsis lines");
 	if (prev_line == LINE_UNDEFINED)
@@ -734,11 +795,13 @@
 	if (prev_line == LINE_NONE)
 		yyerror("Ellipsis line must follow a collating identifier lines");
 	if (weight_index == 0) {
-		for(i = 0; i < directive_count; i++)
+		for(i = 0; i < directive_count; i++) {
 			weight_table[i] = sym_ellipsis;
-	} else if (weight_index != directive_count)
+			weight_map[i].v = i;
+		}
+	} else if (map_idx != directive_count)
 		yyerror("Not enough weights specified");
-	for(i = 0; i < directive_count; i++) {
+	for(i = 0; i < weight_index; i++) {
 		if (weight_table[i].type != SYMBOL_ELLIPSIS)
 			continue;
 		switch (prev_weight_table[i].type) {
@@ -754,19 +817,23 @@
 	}
 	memcpy(prev2_weight_table, prev_weight_table, sizeof(prev_weight_table));
 	memcpy(prev_weight_table, weight_table, sizeof(weight_table));
+	memcpy(prev2_weight_map, prev_weight_map, sizeof(prev_weight_map));
+	memcpy(prev_weight_map, weight_map, sizeof(weight_map));
 	prev_line = LINE_ELLIPSIS;
 	allow_ellipsis = 0;
 }
-	| UNDEFINED { weight_index = 0; allow_ellipsis = 1; } weights {
+	| UNDEFINED { weight_index = 0; map_idx = 0; allow_ellipsis = 1; } weights {
 	int i;
 
 	if (weight_index == 0) {
 		weight_table[0] = sym_undefined;
-		for(i = 1; i < directive_count; i++)
+		for(i = 1; i < directive_count; i++) {
 			weight_table[i] = sym_ellipsis;
-	} else if (weight_index != directive_count)
+			weight_map[i].v = i;
+		}
+	} else if (map_idx != directive_count)
 		yyerror("Not enough weights specified");
-	for(i = 0; i < directive_count; i++) {
+	for(i = 0; i < map_idx; i++) {
 		switch (weight_table[i].type) {
 		case SYMBOL_CHAR:
 		case SYMBOL_CHAIN:
@@ -784,7 +851,9 @@
 			yyerror("Strings can't be used with UNDEFINED");
 		}
 	}
+	/* Assume that UNDEFINED has 1->1 mapping. */
 	memcpy(prev_weight_table, weight_table, sizeof(weight_table));
+	memcpy(prev_weight_map, weight_map, sizeof(weight_map));
 	prev_line = LINE_UNDEFINED;
 }
 ;
@@ -794,66 +863,70 @@
 ;
 expansion :
 	  weight_ex
-	| expansion weight_ex_null
+	| expansion weight_ex
 ;
 weight2 : weight
-	| EXPAND expansion EXPAND {
-#ifdef VSDEBUG
-		printf("[%d]=%d ",
-		    weight_index, weight_table[weight_index]->val);
-#endif
-		weight_index++;
-	}
+	| EXPAND {
+} expansion EXPAND {
+	/* Store the end (inclusive), not the beginning. */
+	weight_map[map_idx].v = weight_index - 1;
+	map_idx++;
+}
 ;
 weights :
 	| weight
 	| weights ';' weight
 ;
-weight :  weight_ex1
+weight : ELEM {
+	struct symbol *s;
+
+	assert(weight_index < COLL_WEIGHTS_REAL);
+	if (map_idx >= directive_count)
+		yyerror("More weights than specified by order_start (%d >= %d, %s)", map_idx,
+		    directive_count, showwcs($1, CHARMAP_SYMBOL_LEN));
+	s = getsymbol($1, EXISTS);
+	if (order_pass && s->val == PRI_UNDEFINED)
+		printf("<%s> is undefined\n", showwcs($1, CHARMAP_SYMBOL_LEN));
+	weight_map[map_idx++].v = weight_index;
+	weight_table[weight_index++] = *s;
+}
 	| ELLIPSIS {
-	if (weight_index >= directive_count)
+	if (map_idx >= directive_count)
 		yyerror("More weights than specified by order_start");
 	if (!allow_ellipsis)
 		yyerror("Ellipsis weight not allowed");
+	/* Mark the end of weights for this level */
+	weight_map[map_idx++].v = weight_index;
 	weight_table[weight_index++] = sym_ellipsis;
 }
 	| IGNORE {
-	if (weight_index >= directive_count)
+	if (map_idx >= directive_count)
 		yyerror("More weights than specified by order_start");
+	weight_map[map_idx++].v = weight_index;
 	weight_table[weight_index++] = sym_ignore;
 }
 	| STRING {
-	if (weight_index >= directive_count)
+	/* XXX This is broken now, and should be removed. */
+	if (map_idx >= directive_count)
 		yyerror("More weights than specified by order_start");
 	if (wcslen($1) > STR_LEN)
 		yyerror("String '%s' is too long", showwcs($1, STR_LEN));
+	/* Store the end, not the beginning. In the simple case it is equivalent. */
+	weight_map[map_idx++].v = weight_index;
 	weight_table[weight_index++] = *getstring($1);
 }
 ;
-weight_ex1 : weight_ex { weight_index++; }
-;
 weight_ex : ELEM {
 	struct symbol *s;
 
-	if (weight_index >= directive_count)
-		yyerror("More weights than specified by order_start (%d >= %d, %s)", weight_index,
+	assert(weight_index < COLL_WEIGHTS_REAL);
+	if (map_idx >= directive_count)
+		yyerror("More weights than specified by order_start (%d >= %d, %s)", map_idx,
 		    directive_count, showwcs($1, CHARMAP_SYMBOL_LEN));
 	s = getsymbol($1, EXISTS);
 	if (order_pass && s->val == PRI_UNDEFINED)
 		printf("<%s> is undefined\n", showwcs($1, CHARMAP_SYMBOL_LEN));
-	weight_table[weight_index] = *s;
-}
-;
-weight_ex_null : ELEM {
-	struct symbol *s;
-
-	if (weight_index >= directive_count)
-		yyerror("More weights than specified by order_start (%d >= %d, %s)", weight_index,
-		    directive_count, showwcs($1, CHARMAP_SYMBOL_LEN));
-	s = getsymbol($1, EXISTS);
-	if (order_pass && s->val == PRI_UNDEFINED)
-		printf("<%s> is undefined\n", showwcs($1, CHARMAP_SYMBOL_LEN));
-	weight_table[weight_index].val += s->val;
+	weight_table[weight_index++] = *s;
 }
 ;
 order_end : ORDER_END '\n'
@@ -1026,7 +1099,7 @@
 	for (ch = 0; ch <= UCHAR_MAX; ch++)
 		for(z = 0; z < COLL_WEIGHTS_MAX; z++)
 			__collate_char_pri_table[ch].pri[z] = PRI_UNDEFINED;
-	if (setlocale(LC_ALL, "en_US.UTF-8") == NULL) 
+	if (setlocale(LC_ALL, "en_US.UTF-8") == NULL)
 		errx(1, "setlocale() failed");
 #ifdef COLLATE_DEBUG
 	while((ch = getopt(ac, av, ":do:I:m:")) != -1) {
@@ -1077,28 +1150,36 @@
 {
 	DBT key, val;
 	struct __collate_st_char_pri *p;
-	int ret;
+	int ret, z;
+
+	if (c <= UCHAR_MAX) {
+		p = &__collate_char_pri_table[c];
+		for (z = 0; z < COLL_WEIGHTS_MAX; z++)
+			p->map[z].v = z;
+		for(z = 0; z < COLL_WEIGHTS_REAL; z++)
+			p->pri[z] = PRI_UNDEFINED;
 
-	if (c <= UCHAR_MAX)
-		return &__collate_char_pri_table[c];
+		return p;
+	}
 	key.data = &c;
 	key.size = sizeof(int32_t);
 	if ((ret = largemapdb->get(largemapdb, &key, &val, 0)) < 0)
 		err(1, "getpri: Error getting %s", charname(c));
 	if (ret != 0) {
-		struct __collate_st_char_pri *pn;
-		int z;
-		if ((pn = (struct __collate_st_char_pri *)malloc(sizeof(struct __collate_st_char_pri))) == NULL)
+		if ((p = (struct __collate_st_char_pri *)
+		    malloc(sizeof(struct __collate_st_char_pri))) == NULL)
 			err(1, "getpri: malloc");
 		for(z = 0; z < COLL_WEIGHTS_MAX; z++)
-			pn->pri[z] = PRI_UNDEFINED;
-		val.data = &pn;
+			p->map[z].v = z;
+		for(z = 0; z < COLL_WEIGHTS_REAL; z++)
+			p->pri[z] = PRI_UNDEFINED;
+		val.data = &p;
 		val.size = sizeof(struct __collate_st_char_pri *);
 		if (largemapdb->put(largemapdb, &key, &val, 0) < 0)
 			err(1, "getpri: Error storing %s", charname(c));
 		nlargemap++;
 	}
-	memcpy(&p, val.data, sizeof(struct __collate_st_char_pri *));
+
 	return p;
 }
 
@@ -1137,7 +1218,8 @@
 		int z;
 		if (exists > 0)
 			errx(1, "getchain: \"%s\" is not defined", showwcs(wcs, STR_LEN));
-		if ((pn = (struct __collate_st_chain_pri *)malloc(sizeof(struct __collate_st_chain_pri))) == NULL)
+		if ((pn = (struct __collate_st_chain_pri *)
+		    malloc(sizeof(struct __collate_st_chain_pri))) == NULL)
 			err(1, "getchain: malloc");
 		for(z = 0; z < COLL_WEIGHTS_MAX; z++)
 			pn->pri[z] = PRI_UNDEFINED;
@@ -1237,6 +1319,7 @@
 	struct symbol *p;
 	int ret;
 
+	errx(1, "internal error: getstring called");
 	key.data = (void *)wcs;
 	key.size = wcslen(wcs) * sizeof(wchar_t);
 	if ((ret = stringdb->get(stringdb, &key, &val, 0)) < 0)
@@ -1254,6 +1337,7 @@
 			err(1, "getstring: Error storing \"%s\"", showwcs(wcs, STR_LEN));
 	}
 	memcpy(&p, val.data, sizeof(struct symbol *));
+
 	return p;
 }
 
@@ -1391,19 +1475,32 @@
 static char *
 show(int c)
 {
-	static char buf[5];
+	static char buf[40];
+	char *p, utfbuf[6];
+	int i;
 
 	if (c >=32 && c <= 126)
 		sprintf(buf, "'%c' ", c);
-	else
+	else {
 		sprintf(buf, "\\x{%02x}", c);
+	}
+	if ((c = wctomb(utfbuf, c)) != -1) {
+		p = buf + strlen(buf);
+		*p++ = '(';
+		for (i = 0; i < c; i++) {
+			sprintf(p, "%X ", (unsigned char)utfbuf[i]);
+			p += strlen(p);
+		}
+		sprintf(p, ")");
+	}
+
 	return buf;
 }
 
 static void
 collate_print_tables(void)
 {
-	int i, z;
+	int i, z, pos;
 
 	printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n",
 	    info.directive[0], info.directive[1],
@@ -1436,25 +1533,65 @@
 			putchar('\n');
 		}
 	}
-	printf("Char priority table:\n");
+	printf("Char priority table: (utf8 in parens)\n");
 	{
 		struct __collate_st_char_pri *p2 = __collate_char_pri_table;
+
 		for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
+			if (p2->map[1].v == 0)
+				continue;	/* Entry not used. */
 			printf("\t%s :", show(i));
-			for(z = 0; z < info.directive_count; z++)
-				printf(" %d", ntohl(p2->pri[z]));
+			for(z = 0, pos = 0; z < info.directive_count; z++) {
+				/*
+				 * Is the last weight in the sequence on
+				 * this position? The assertion here won't hold if some
+				 * character wasn't used at all. Hence, the additional 'if'
+				 * above.
+				 */
+				assert(pos <= p2->map[z].v);
+				if (pos == p2->map[z].v) {
+					printf(" %d", ntohl(p2->pri[pos]));
+					pos++;
+					continue;
+				}
+				printf(" \"");
+				/* Output all expanded weights in sequence. */
+				while (pos <= p2->map[z].v) {
+					printf("%d%s", ntohl(p2->pri[pos]),
+					    pos < p2->map[z].v ? " " : "");
+					    pos++;
+				}
+				printf("\"");
+			}
 			putchar('\n');
 		}
 	}
 	if (info.large_pri_count > 0) {
 		struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table;
-		printf("Large priority table:\n");
+
+		printf("Large priority table: (utf8 in parens)\n");
 		for (i = info.large_pri_count; i-- > 0; p2++) {
 			if (p2->val == 0)
 				break;
 			printf("\t%s :", show(ntohl(p2->val)));
-			for(z = 0; z < info.directive_count; z++)
-				printf(" %d", ntohl(p2->pri.pri[z]));
+			for(z = 0, pos = 0; z < info.directive_count; z++) {
+				/* Is the last weight in the sequence on
+				 * this position? */
+				assert(pos <= p2->pri.map[z].v);
+				if (pos == p2->pri.map[z].v) {
+					printf(" %d", ntohl(p2->pri.pri[pos]));
+					pos++;
+					continue;
+				}
+				printf(" \"");
+				/* Output all expanded weights in sequence. */
+				while (pos <= p2->pri.map[z].v) {
+					printf("%d%s", ntohl(p2->pri.pri[pos]),
+					    pos < p2->pri.map[z].v ? " " : "");
+					    pos++;
+				}
+				printf("\"");
+			}
 			putchar('\n');
 		}
 	}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200809011122.m81BMlRF030916>