Skip site navigation (1)Skip section navigation (2)
Date:      03 Nov 2001 03:21:50 +0100
From:      Dag-Erling Smorgrav <des@ofug.org>
To:        arch@freebsd.org
Subject:   POSIX character class support for 1Tawk
Message-ID:  <xzpu1wca91d.fsf@flood.ping.uio.no>

next in thread | raw e-mail | index | archive | help
--=-=-=

See attached patch (which I've also submitted to bwk).

DES
-- 
Dag-Erling Smorgrav - des@ofug.org


--=-=-=
Content-Type: text/x-patch
Content-Disposition: attachment; filename=awk.diff

Index: b.c
===================================================================
RCS file: /home/ncvs/src/contrib/one-true-awk/b.c,v
retrieving revision 1.1.1.1
diff -u -r1.1.1.1 b.c
--- b.c	27 Oct 2001 08:07:37 -0000	1.1.1.1
+++ b.c	3 Nov 2001 01:14:16 -0000
@@ -683,12 +683,44 @@
 	}
 }
 
+/*
+ * Character class definitions conformant to the POSIX locale as
+ * defined in IEEE P1003.1 draft 7 of June 2001, assuming the source
+ * and operating character sets are both ASCII (ISO646) or supersets
+ * thereof.
+ *
+ * Note that to avoid overflowing the temporary buffer used in
+ * relex(), the expanded character class (prior to range expansion)
+ * must be less than twice the size of their full name.
+ */
+struct charclass {
+	const uschar *cc_name;
+	int cc_namelen;
+	const uschar *cc_expand;
+} charclasses[] = {
+	{ "alnum",	5,	"0-9A-Za-z" },
+	{ "alpha",	5,	"A-Za-z" },
+	{ "blank",	5,	" \t" },
+	{ "cntrl",	5,	"\000-\037\177" },
+	{ "digit",	5,	"0-9" },
+	{ "graph",	5,	"\041-\176" },
+	{ "lower",	5,	"a-z" },
+	{ "print",	5,	" \041-\176" },
+	{ "punct",	5,	"\041-\057\072-\100\133-\140\173-\176" },
+	{ "space",	5,	" \f\n\r\t\v" },
+	{ "upper",	5,	"A-Z" },
+	{ "xdigit",	6,	"0-9A-Fa-f" },
+	{ NULL,		0,	NULL },
+};
+
 int relex(void)		/* lexical analyzer for reparse */
 {
+	struct charclass *cc;
 	int c, n;
 	int cflag;
 	static uschar *buf = 0;
 	static int bufsz = 100;
+	const uschar *p;
 	uschar *bp;
 
 	switch (c = *prestr++) {
@@ -730,6 +762,17 @@
 				*bp++ = c;
 			/* } else if (c == '\n') { */
 			/* 	FATAL("newline in character class %.20s...", lastre); */
+			} else if (c == '[' && *prestr == ':') {
+				for (cc = charclasses; cc->cc_name; cc++)
+					if (strncmp(prestr + 1, cc->cc_name, cc->cc_namelen) == 0)
+						break;
+				if (cc->cc_name != NULL && prestr[1 + cc->cc_namelen] == ':' &&
+				    prestr[2 + cc->cc_namelen] == ']') {
+					prestr += cc->cc_namelen + 3;
+					for (p = cc->cc_expand; *p; p++)
+						*bp++ = *p;
+				} else
+					*bp++ = c;
 			} else if (c == '\0') {
 				FATAL("nonterminated character class %.20s", lastre);
 			} else if (bp == buf) {	/* 1st char is special */

--=-=-=--

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-arch" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?xzpu1wca91d.fsf>