Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 9 Dec 2013 15:58:51 +0100 (CET)
From:      Christian Weisgerber <naddy@FreeBSD.org>
To:        FreeBSD-gnats-submit@freebsd.org
Subject:   ports/184632: textproc/sgmlformat: broken regexp -> regex conversion
Message-ID:  <201312091458.rB9EwpoE066957@lorvorc.mips.inka.de>
Resent-Message-ID: <201312091500.rB9F00rg026463@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         184632
>Category:       ports
>Synopsis:       textproc/sgmlformat: broken regexp -> regex conversion
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-ports-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Mon Dec 09 15:00:00 UTC 2013
>Closed-Date:
>Last-Modified:
>Originator:     Christian Weisgerber
>Release:        FreeBSD 9.2-STABLE amd64
>Organization:
>Environment:
System: FreeBSD lorvorc.mips.inka.de 9.2-STABLE FreeBSD 9.2-STABLE #0 r259069: Sat Dec 7 15:52:28 CET 2013 naddy@lorvorc.mips.inka.de:/usr/obj/usr/src/sys/GENERIC amd64

>Description:

I don't know how to test this functionality, but the conversion
from UNIX V8 <regexp.h> to POSIX <regex.h> in patch-regex.txt cannot
possibly be correct.

You may want to compare the regex(3) and old regexp(3) man pages.
http://svnweb.freebsd.org/base/stable/8/lib/libcompat/regexp/regexp.3?revision=196045&view=markup

The most obvious problem is in translate.c, where changes like this
 
-               if (!regexec(t->attpair[a].rex, atval)) match = 0;
+               if (!regexec(&t->attpair[a].rex, atval, 0, NULL, 0)) match = 0;
 
ignore that V8 regexec() and POSIX regexec() have inverted result
codes.  V8 regexec() returns 0 for failure, POSIX regexec() returns
0 for success.

The problems in traninit.c are more subtle:

-       if (!(T.var_RE_value=regcomp(buf)))     {
+       if (regcomp(&T.var_RE_value, buf, 0) != 0) {

What happens in the error case?  When POSIX regcomp() returns an
error, the value of var_RE_value will be undefined.  However, in
translate.c, regexec() is blindly called with this value.

Also, V8 regcomp() uses extended regular expressions.

I have attached a replacement patch that reimplements the conversion
from regexp to regex.  It uses two simple wrapper functions that
map the required V8 regcomp/regexec functionality onto POSIX
regcomp/regexec.  In particular, this allows us to still use
(regex_t *)NULL to indicate an invalid/nonexistent regular expression.

Again, I can't actually test this.

>How-To-Repeat:

>Fix:

--- instant/tables.c.orig	1996-09-08 03:55:10.000000000 +0200
+++ instant/tables.c	2013-11-30 23:51:25.000000000 +0100
@@ -84,7 +84,7 @@
 #include <sys/types.h>
 #include <errno.h>
 
-#include <regexp.h>
+#include <regex.h>
 #include "general.h"
 #include "translate.h"
 
--- instant/traninit.c.orig	1997-07-16 18:44:12.000000000 +0200
+++ instant/traninit.c	2013-11-30 23:51:25.000000000 +0100
@@ -69,7 +69,7 @@
 #include <memory.h>
 #include <sys/types.h>
 #include <errno.h>
-#include <regexp.h>
+#include <regex.h>
 
 #include "general.h"
 #include "translate.h"
@@ -100,6 +100,23 @@
 void	AddSDATA(const char *from, const char *to);
 
 /* ______________________________________________________________________ */
+/* minimal compatibility wrapper for UNIX V8 regexp, match only
+ */
+
+static regex_t *v8_regcomp(const char *pattern)
+{
+	regex_t *re;
+	if ((re = malloc(sizeof(regex_t))) != NULL) {
+		if (regcomp(re, pattern, REG_EXTENDED|REG_NOSUB)) {
+			free(re);
+			return NULL;
+		}
+	}
+	return re;
+}
+#define regcomp	v8_regcomp
+
+/* ______________________________________________________________________ */
 /*  Read the translation specs from the input file, storing in memory.
  *  Arguments:
  *	Name of translation spec file.
--- instant/translate.c.orig	1996-09-08 03:55:10.000000000 +0200
+++ instant/translate.c	2013-11-30 23:51:25.000000000 +0100
@@ -69,7 +69,7 @@
 #include <memory.h>
 #include <sys/types.h>
 #include <errno.h>
-#include <regexp.h>
+#include <regex.h>
 
 #include "general.h"
 #define STORAGE
@@ -82,6 +82,18 @@
 static void	WasProcessed(Element_t *);
 
 /* ______________________________________________________________________ */
+/* minimal compatibility wrapper for UNIX V8 regexp, match only 
+ */
+
+static int v8_regexec(const regex_t *re, const char *string)
+{
+	if (re == NULL)
+		return 0;
+	return !regexec(re, string, 0, NULL, 0);
+}
+#define regexec	v8_regexec
+
+/* ______________________________________________________________________ */
 /*  Translate the subtree starting at 'e'. Output goes to 'fp'.
  *  This is the entry point for translating an instance.
  *  Arguments:
--- instant/translate.h.orig	1996-09-08 03:55:10.000000000 +0200
+++ instant/translate.h	2013-11-30 23:51:25.000000000 +0100
@@ -75,7 +75,7 @@
 typedef struct {
     char	*name;		/* attribute name string */
     char	*val;		/* attribute value string */
-    regexp	*rex;		/* attribute value reg expr (compiled) */
+    regex_t	*rex;		/* attribute value reg expr (compiled) */
 } AttPair_t;
 
 typedef struct _Trans {
@@ -83,19 +83,19 @@
     char	*gi;		/* element name of tag under consideration */
     char	**gilist;	/* list of element names (multiple gi's) */
     char	*context;	/* context in tree - looking depth levels up */
-    regexp	*context_re;	/* tree heirarchy looking depth levels up */
+    regex_t	*context_re;	/* tree heirarchy looking depth levels up */
     int		depth;		/* number of levels to look up the tree */
     AttPair_t	*attpair;	/* attr name-value pairs */
     int		nattpairs;	/* number of name-value pairs */
     char	*parent;	/* GI has this element as parent */
     int		nth_child;	/* GI is Nth child of this of parent element */
     char	*content;	/* element has this string in content */
-    regexp	*content_re;	/* content reg expr (compiled) */
+    regex_t	*content_re;	/* content reg expr (compiled) */
     char	*pattrset;	/* is this attr set (any value) in parent? */
     char	*var_name;	/* variable name */
     char	*var_value;	/* variable value */
     char	*var_RE_name;	/* variable name (for VarREValue) */
-    regexp	*var_RE_value;	/* variable value (compiled, for VarREValue) */
+    regex_t	*var_RE_value;	/* variable value (compiled, for VarREValue) */
     Map_t	*relations;	/* various relations to check */
 
     /* actions */
@@ -150,4 +150,3 @@
 void	OSFtable(Element_t *, FILE *, char **, int);
 
 /* ______________________________________________________________________ */
-
--- instant/tranvar.c.orig	1997-02-07 03:40:45.000000000 +0100
+++ instant/tranvar.c	2013-11-30 23:51:25.000000000 +0100
@@ -66,7 +66,7 @@
 #include <sys/types.h>
 #include <errno.h>
 
-#include <regexp.h>
+#include <regex.h>
 #include "general.h"
 #include "translate.h"
 
--- instant/util.c.orig	1996-09-08 03:55:10.000000000 +0200
+++ instant/util.c	2013-11-30 23:51:25.000000000 +0100
@@ -85,7 +85,7 @@
 #include <sys/stat.h>
 #include <sys/file.h>
 #include <errno.h>
-#include <regexp.h>
+#include <regex.h>
 /* CSS don't have it and I don't see where it's used
 #include <values.h>
 */
>Release-Note:
>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201312091458.rB9EwpoE066957>