Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 4 May 2014 16:50:01 GMT
From:      Pedro Giffuni <pfg@freebsd.org>
To:        freebsd-bugs@FreeBSD.org
Subject:   Re: kern/169302: [libc] [patch] Applied MidnightBSD regex memory consumption limits
Message-ID:  <201405041650.s44Go1M3064711@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help
The following reply was made to PR kern/169302; it has been noted by GNATS.

From: Pedro Giffuni <pfg@freebsd.org>
To: "bug-followup@FreeBSD.org" <bug-followup@FreeBSD.org>, 
 "zblacher@sandvine.com" <zblacher@sandvine.com>
Cc:  
Subject: Re: kern/169302: [libc] [patch] Applied MidnightBSD regex memory
 consumption limits
Date: Sun, 04 May 2014 11:41:45 -0500

 This is a multi-part message in MIME format.
 --------------020708080805040202010709
 Content-Type: text/plain; charset=ISO-8859-1; format=flowed
 Content-Transfer-Encoding: 7bit
 
 Updated patch, based on the NetBSD changes.
 Also use calloc(1) as in OpenBSD.
 
 --------------020708080805040202010709
 Content-Type: text/plain; charset=us-ascii;
  name="patch-regex-pr169302.txt"
 Content-Transfer-Encoding: 7bit
 Content-Disposition: attachment;
  filename="patch-regex-pr169302.txt"
 
 Index: lib/libc/regex/engine.c
 ===================================================================
 --- lib/libc/regex/engine.c	(revision 265307)
 +++ lib/libc/regex/engine.c	(working copy)
 @@ -219,7 +219,7 @@
  		} else {
  			for (dp = start; dp < stop; dp++)
  				if (*dp == g->must[0] &&
 -				    stop - dp >= g->mlen &&
 +				    (size_t)(stop - dp) >= g->mlen &&
  				    memcmp(dp, g->must, (size_t)g->mlen) == 0)
  					break;
  			if (dp == stop)		/* we didn't find g->must */
 Index: lib/libc/regex/regcomp.c
 ===================================================================
 --- lib/libc/regex/regcomp.c	(revision 265307)
 +++ lib/libc/regex/regcomp.c	(working copy)
 @@ -86,11 +86,11 @@
  #endif
  
  /* === regcomp.c === */
 -static void p_ere(struct parse *p, int stop);
 -static void p_ere_exp(struct parse *p);
 +static void p_ere(struct parse *p, int stop, size_t reclimit);
 +static void p_ere_exp(struct parse *p, size_t reclimit);
  static void p_str(struct parse *p);
 -static void p_bre(struct parse *p, int end1, int end2);
 -static int p_simp_re(struct parse *p, int starordinary);
 +static void p_bre(struct parse *p, int end1, int end2, size_t reclimit);
 +static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
  static int p_count(struct parse *p);
  static void p_bracket(struct parse *p);
  static void p_b_term(struct parse *p, cset *cs);
 @@ -102,7 +102,7 @@
  static void bothcases(struct parse *p, wint_t ch);
  static void ordinary(struct parse *p, wint_t ch);
  static void nonnewline(struct parse *p);
 -static void repeat(struct parse *p, sopno start, int from, int to);
 +static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit);
  static int seterr(struct parse *p, int e);
  static cset *allocset(struct parse *p);
  static void freeset(struct parse *p, cset *cs);
 @@ -167,6 +167,13 @@
  #define	never	0		/* some <assert.h>s have bugs too */
  #endif
  
 +#define	MEMLIMIT	0x8000000
 +#define MEMSIZE(p) \
 +	((p)->ncsalloc / CHAR_BIT * NC + \
 +	(p)->ncsalloc * sizeof(cset) + \
 +	(p)->ssize * sizeof(sop))
 +#define	RECLIMIT	256
 +
  /* Macro used by computejump()/computematchjump() */
  #define MIN(a,b)	((a)<(b)?(a):(b))
  
 @@ -214,7 +221,7 @@
  	if (g == NULL)
  		return(REG_ESPACE);
  	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
 -	p->strip = (sop *)malloc(p->ssize * sizeof(sop));
 +	p->strip = calloc(p->ssize, sizeof(sop));
  	p->slen = 0;
  	if (p->strip == NULL) {
  		free((char *)g);
 @@ -249,11 +256,11 @@
  	EMIT(OEND, 0);
  	g->firststate = THERE();
  	if (cflags&REG_EXTENDED)
 -		p_ere(p, OUT);
 +		p_ere(p, OUT, 0);
  	else if (cflags&REG_NOSPEC)
  		p_str(p);
  	else
 -		p_bre(p, OUT, OUT);
 +		p_bre(p, OUT, OUT, 0);
  	EMIT(OEND, 0);
  	g->laststate = THERE();
  
 @@ -294,7 +301,8 @@
   */
  static void
  p_ere(struct parse *p,
 -	int stop)		/* character this ERE should end at */
 +	int stop,		/* character this ERE should end at */
 +	size_t reclimit)
  {
  	char c;
  	sopno prevback;
 @@ -302,11 +310,16 @@
  	sopno conc;
  	int first = 1;		/* is this the first alternative? */
  
 +	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
 +		p->error = REG_ESPACE;
 +		return;
 +	}
 +
  	for (;;) {
  		/* do a bunch of concatenated expressions */
  		conc = HERE();
  		while (MORE() && (c = PEEK()) != '|' && c != stop)
 -			p_ere_exp(p);
 +			p_ere_exp(p, reclimit);
  		(void)REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
  
  		if (!EAT('|'))
 @@ -335,10 +348,10 @@
  
  /*
   - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
 - == static void p_ere_exp(struct parse *p);
 + == static void p_ere_exp(struct parse *p, size_t reclimit);
   */
  static void
 -p_ere_exp(struct parse *p)
 +p_ere_exp(struct parse *p, size_t reclimit)
  {
  	char c;
  	wint_t wc;
 @@ -361,7 +374,7 @@
  			p->pbegin[subno] = HERE();
  		EMIT(OLPAREN, subno);
  		if (!SEE(')'))
 -			p_ere(p, ')');
 +			p_ere(p, ')', reclimit);
  		if (subno < NPAREN) {
  			p->pend[subno] = HERE();
  			assert(p->pend[subno] != 0);
 @@ -465,7 +478,7 @@
  				count2 = INFINITY;
  		} else		/* just a single number */
  			count2 = count;
 -		repeat(p, pos, count, count2);
 +		repeat(p, pos, count, count2, 0);
  		if (!EAT('}')) {	/* error heuristics */
  			while (MORE() && PEEK() != '}')
  				NEXT();
 @@ -499,7 +512,7 @@
  /*
   - p_bre - BRE parser top level, anchoring and concatenation
   == static void p_bre(struct parse *p,  int end1, \
 - ==	int end2);
 + ==	int end2, size_t reclimit);
   * Giving end1 as OUT essentially eliminates the end1/end2 check.
   *
   * This implementation is a bit of a kludge, in that a trailing $ is first
 @@ -509,8 +522,14 @@
  static void
  p_bre(struct parse *p,
  	int end1,		/* first terminating character */
 -	int end2)		/* second terminating character */
 +	int end2,		/* second terminating character */
 +	size_t reclimit)
  {
 +	if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) {
 +		p->error = REG_ESPACE;
 +		return;
 +	}
 +
  	sopno start = HERE();
  	int first = 1;			/* first subexpression? */
  	int wasdollar = 0;
 @@ -521,7 +540,7 @@
  		p->g->nbol++;
  	}
  	while (MORE() && !SEETWO(end1, end2)) {
 -		wasdollar = p_simp_re(p, first);
 +		wasdollar = p_simp_re(p, first, reclimit);
  		first = 0;
  	}
  	if (wasdollar) {	/* oops, that was a trailing anchor */
 @@ -536,11 +555,12 @@
  
  /*
   - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
 - == static int p_simp_re(struct parse *p, int starordinary);
 + == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit);
   */
  static int			/* was the simple RE an unbackslashed $? */
  p_simp_re(struct parse *p,
 -	int starordinary)	/* is a leading * an ordinary character? */
 +	int starordinary,	/* is a leading * an ordinary character? */
 +	size_t reclimit)
  {
  	int c;
  	int count;
 @@ -580,7 +600,7 @@
  		EMIT(OLPAREN, subno);
  		/* the MORE here is an error heuristic */
  		if (MORE() && !SEETWO('\\', ')'))
 -			p_bre(p, '\\', ')');
 +			p_bre(p, '\\', ')', reclimit);
  		if (subno < NPAREN) {
  			p->pend[subno] = HERE();
  			assert(p->pend[subno] != 0);
 @@ -641,7 +661,7 @@
  				count2 = INFINITY;
  		} else		/* just a single number */
  			count2 = count;
 -		repeat(p, pos, count, count2);
 +		repeat(p, pos, count, count2, 0);
  		if (!EATTWO('\\', '}')) {	/* error heuristics */
  			while (MORE() && !SEETWO('\\', '}'))
  				NEXT();
 @@ -996,13 +1016,15 @@
  
  /*
   - repeat - generate code for a bounded repetition, recursively if needed
 - == static void repeat(struct parse *p, sopno start, int from, int to);
 + == static void repeat(struct parse *p, sopno start, int from, int to,
 + == size_t reclimit );
   */
  static void
  repeat(struct parse *p,
  	sopno start,		/* operand from here to end of strip */
  	int from,		/* repeated from this number */
 -	int to)			/* to this number of times (maybe INFINITY) */
 +	int to,			/* to this number of times (maybe INFINITY) */
 +	size_t reclimit)
  {
  	sopno finish = HERE();
  #	define	N	2
 @@ -1011,7 +1033,9 @@
  #	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
  	sopno copy;
  
 -	if (p->error != 0)	/* head off possible runaway recursion */
 +	if (reclimit++ > RECLIMIT) 
 +		p->error = REG_ESPACE;
 +	if (p->error)
  		return;
  
  	assert(from <= to);
 @@ -1025,7 +1049,7 @@
  	case REP(0, INF):		/* as x{1,}? */
  		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
  		INSERT(OCH_, start);		/* offset is wrong... */
 -		repeat(p, start+1, 1, to);
 +		repeat(p, start+1, 1, to, reclimit);
  		ASTERN(OOR1, start);
  		AHEAD(start);			/* ... fix it */
  		EMIT(OOR2, 0);
 @@ -1045,7 +1069,7 @@
  		ASTERN(O_CH, THERETHERE());
  		copy = dupl(p, start+1, finish+1);
  		assert(copy == finish+4);
 -		repeat(p, copy, 1, to-1);
 +		repeat(p, copy, 1, to-1, reclimit);
  		break;
  	case REP(1, INF):		/* as x+ */
  		INSERT(OPLUS_, start);
 @@ -1053,11 +1077,11 @@
  		break;
  	case REP(N, N):			/* as xx{m-1,n-1} */
  		copy = dupl(p, start, finish);
 -		repeat(p, copy, from-1, to-1);
 +		repeat(p, copy, from-1, to-1, reclimit);
  		break;
  	case REP(N, INF):		/* as xx{n-1,INF} */
  		copy = dupl(p, start, finish);
 -		repeat(p, copy, from-1, to);
 +		repeat(p, copy, from-1, to, reclimit);
  		break;
  	default:			/* "can't happen" */
  		SETERROR(REG_ASSERT);	/* just in case */
 @@ -1112,8 +1136,13 @@
  {
  	cset *cs, *ncs;
  
 +
 +	if (MEMSIZE(p) > MEMLIMIT)
 +		goto oomem;
 +
  	ncs = realloc(p->g->sets, (p->g->ncsets + 1) * sizeof(*ncs));
  	if (ncs == NULL) {
 +oomem:
  		SETERROR(REG_ESPACE);
  		return (NULL);
  	}
 @@ -1347,17 +1376,22 @@
  enlarge(struct parse *p, sopno size)
  {
  	sop *sp;
 +	sopno osize;
  
  	if (p->ssize >= size)
  		return 1;
 -
 +	osize = p->ssize;
 +	p->ssize = size;
 +	if (MEMSIZE(p) > MEMLIMIT)
 +		goto oomem;
  	sp = (sop *)realloc(p->strip, size*sizeof(sop));
  	if (sp == NULL) {
 +oomem:
 +		p->ssize = osize;
  		SETERROR(REG_ESPACE);
  		return 0;
  	}
  	p->strip = sp;
 -	p->ssize = size;
  	return 1;
  }
  
 Index: lib/libc/regex/regex2.h
 ===================================================================
 --- lib/libc/regex/regex2.h	(revision 265307)
 +++ lib/libc/regex/regex2.h	(working copy)
 @@ -73,7 +73,7 @@
   * immediately *preceding* "execution" of that operator.
   */
  typedef unsigned long sop;	/* strip operator */
 -typedef long sopno;
 +typedef size_t sopno;
  #define	OPRMASK	0xf8000000L
  #define	OPDMASK	0x07ffffffL
  #define	OPSHIFT	((unsigned)27)
 @@ -165,7 +165,7 @@
  	int magic;
  #		define	MAGIC2	((('R'^0200)<<8)|'E')
  	sop *strip;		/* malloced area for strip */
 -	int ncsets;		/* number of csets in use */
 +	size_t ncsets;		/* number of csets in use */
  	cset *sets;		/* -> cset [ncsets] */
  	int cflags;		/* copy of regcomp() cflags argument */
  	sopno nstates;		/* = number of sops */
 @@ -175,13 +175,13 @@
  #		define	USEBOL	01	/* used ^ */
  #		define	USEEOL	02	/* used $ */
  #		define	BAD	04	/* something wrong */
 -	int nbol;		/* number of ^ used */
 -	int neol;		/* number of $ used */
 +	size_t nbol;		/* number of ^ used */
 +	size_t neol;		/* number of $ used */
  	char *must;		/* match must contain this string */
  	int moffset;		/* latest point at which must may be located */
  	int *charjump;		/* Boyer-Moore char jump table */
  	int *matchjump;		/* Boyer-Moore match jump table */
 -	int mlen;		/* length of must */
 +	size_t mlen;		/* length of must */
  	size_t nsub;		/* copy of re_nsub */
  	int backrefs;		/* does it use back references? */
  	sopno nplus;		/* how deep does it nest +s? */
 
 --------------020708080805040202010709--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201405041650.s44Go1M3064711>