Date: Mon, 9 Dec 2013 15:58:51 +0100 (CET) From: Christian Weisgerber <naddy@FreeBSD.org> To: FreeBSD-gnats-submit@freebsd.org Subject: ports/184632: textproc/sgmlformat: broken regexp -> regex conversion Message-ID: <201312091458.rB9EwpoE066957@lorvorc.mips.inka.de> Resent-Message-ID: <201312091500.rB9F00rg026463@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
>Number: 184632 >Category: ports >Synopsis: textproc/sgmlformat: broken regexp -> regex conversion >Confidential: no >Severity: non-critical >Priority: low >Responsible: freebsd-ports-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: sw-bug >Submitter-Id: current-users >Arrival-Date: Mon Dec 09 15:00:00 UTC 2013 >Closed-Date: >Last-Modified: >Originator: Christian Weisgerber >Release: FreeBSD 9.2-STABLE amd64 >Organization: >Environment: System: FreeBSD lorvorc.mips.inka.de 9.2-STABLE FreeBSD 9.2-STABLE #0 r259069: Sat Dec 7 15:52:28 CET 2013 naddy@lorvorc.mips.inka.de:/usr/obj/usr/src/sys/GENERIC amd64 >Description: I don't know how to test this functionality, but the conversion from UNIX V8 <regexp.h> to POSIX <regex.h> in patch-regex.txt cannot possibly be correct. You may want to compare the regex(3) and old regexp(3) man pages. http://svnweb.freebsd.org/base/stable/8/lib/libcompat/regexp/regexp.3?revision=196045&view=markup The most obvious problem is in translate.c, where changes like this - if (!regexec(t->attpair[a].rex, atval)) match = 0; + if (!regexec(&t->attpair[a].rex, atval, 0, NULL, 0)) match = 0; ignore that V8 regexec() and POSIX regexec() have inverted result codes. V8 regexec() returns 0 for failure, POSIX regexec() returns 0 for success. The problems in traninit.c are more subtle: - if (!(T.var_RE_value=regcomp(buf))) { + if (regcomp(&T.var_RE_value, buf, 0) != 0) { What happens in the error case? When POSIX regcomp() returns an error, the value of var_RE_value will be undefined. However, in translate.c, regexec() is blindly called with this value. Also, V8 regcomp() uses extended regular expressions. I have attached a replacement patch that reimplements the conversion from regexp to regex. It uses two simple wrapper functions that map the required V8 regcomp/regexec functionality onto POSIX regcomp/regexec. In particular, this allows us to still use (regex_t *)NULL to indicate an invalid/nonexistent regular expression. Again, I can't actually test this. >How-To-Repeat: >Fix: --- instant/tables.c.orig 1996-09-08 03:55:10.000000000 +0200 +++ instant/tables.c 2013-11-30 23:51:25.000000000 +0100 @@ -84,7 +84,7 @@ #include <sys/types.h> #include <errno.h> -#include <regexp.h> +#include <regex.h> #include "general.h" #include "translate.h" --- instant/traninit.c.orig 1997-07-16 18:44:12.000000000 +0200 +++ instant/traninit.c 2013-11-30 23:51:25.000000000 +0100 @@ -69,7 +69,7 @@ #include <memory.h> #include <sys/types.h> #include <errno.h> -#include <regexp.h> +#include <regex.h> #include "general.h" #include "translate.h" @@ -100,6 +100,23 @@ void AddSDATA(const char *from, const char *to); /* ______________________________________________________________________ */ +/* minimal compatibility wrapper for UNIX V8 regexp, match only + */ + +static regex_t *v8_regcomp(const char *pattern) +{ + regex_t *re; + if ((re = malloc(sizeof(regex_t))) != NULL) { + if (regcomp(re, pattern, REG_EXTENDED|REG_NOSUB)) { + free(re); + return NULL; + } + } + return re; +} +#define regcomp v8_regcomp + +/* ______________________________________________________________________ */ /* Read the translation specs from the input file, storing in memory. * Arguments: * Name of translation spec file. --- instant/translate.c.orig 1996-09-08 03:55:10.000000000 +0200 +++ instant/translate.c 2013-11-30 23:51:25.000000000 +0100 @@ -69,7 +69,7 @@ #include <memory.h> #include <sys/types.h> #include <errno.h> -#include <regexp.h> +#include <regex.h> #include "general.h" #define STORAGE @@ -82,6 +82,18 @@ static void WasProcessed(Element_t *); /* ______________________________________________________________________ */ +/* minimal compatibility wrapper for UNIX V8 regexp, match only + */ + +static int v8_regexec(const regex_t *re, const char *string) +{ + if (re == NULL) + return 0; + return !regexec(re, string, 0, NULL, 0); +} +#define regexec v8_regexec + +/* ______________________________________________________________________ */ /* Translate the subtree starting at 'e'. Output goes to 'fp'. * This is the entry point for translating an instance. * Arguments: --- instant/translate.h.orig 1996-09-08 03:55:10.000000000 +0200 +++ instant/translate.h 2013-11-30 23:51:25.000000000 +0100 @@ -75,7 +75,7 @@ typedef struct { char *name; /* attribute name string */ char *val; /* attribute value string */ - regexp *rex; /* attribute value reg expr (compiled) */ + regex_t *rex; /* attribute value reg expr (compiled) */ } AttPair_t; typedef struct _Trans { @@ -83,19 +83,19 @@ char *gi; /* element name of tag under consideration */ char **gilist; /* list of element names (multiple gi's) */ char *context; /* context in tree - looking depth levels up */ - regexp *context_re; /* tree heirarchy looking depth levels up */ + regex_t *context_re; /* tree heirarchy looking depth levels up */ int depth; /* number of levels to look up the tree */ AttPair_t *attpair; /* attr name-value pairs */ int nattpairs; /* number of name-value pairs */ char *parent; /* GI has this element as parent */ int nth_child; /* GI is Nth child of this of parent element */ char *content; /* element has this string in content */ - regexp *content_re; /* content reg expr (compiled) */ + regex_t *content_re; /* content reg expr (compiled) */ char *pattrset; /* is this attr set (any value) in parent? */ char *var_name; /* variable name */ char *var_value; /* variable value */ char *var_RE_name; /* variable name (for VarREValue) */ - regexp *var_RE_value; /* variable value (compiled, for VarREValue) */ + regex_t *var_RE_value; /* variable value (compiled, for VarREValue) */ Map_t *relations; /* various relations to check */ /* actions */ @@ -150,4 +150,3 @@ void OSFtable(Element_t *, FILE *, char **, int); /* ______________________________________________________________________ */ - --- instant/tranvar.c.orig 1997-02-07 03:40:45.000000000 +0100 +++ instant/tranvar.c 2013-11-30 23:51:25.000000000 +0100 @@ -66,7 +66,7 @@ #include <sys/types.h> #include <errno.h> -#include <regexp.h> +#include <regex.h> #include "general.h" #include "translate.h" --- instant/util.c.orig 1996-09-08 03:55:10.000000000 +0200 +++ instant/util.c 2013-11-30 23:51:25.000000000 +0100 @@ -85,7 +85,7 @@ #include <sys/stat.h> #include <sys/file.h> #include <errno.h> -#include <regexp.h> +#include <regex.h> /* CSS don't have it and I don't see where it's used #include <values.h> */ >Release-Note: >Audit-Trail: >Unformatted:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201312091458.rB9EwpoE066957>