Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 2 Sep 2015 05:55:58 +0000 (UTC)
From:      Baptiste Daroussin <bapt@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r287393 - stable/10/lib/libc/locale
Message-ID:  <201509020555.t825twGv027171@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: bapt
Date: Wed Sep  2 05:55:57 2015
New Revision: 287393
URL: https://svnweb.freebsd.org/changeset/base/287393

Log:
  MFC: r286490,r286491,r287125
  
  Per rfc3629 value greater than 0x10ffff should be rejected (r286490,r286491)
  
  Make UTF-8 parsing and generation more strict. (r287125  by ed)
  
  - in mbrtowc() we need to disallow codepoints above 0x10ffff.
  - In wcrtomb() we need to disallow codepoints between 0xd800 and 0xdfff.

Modified:
  stable/10/lib/libc/locale/utf8.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/lib/libc/locale/utf8.c
==============================================================================
--- stable/10/lib/libc/locale/utf8.c	Wed Sep  2 05:45:47 2015	(r287392)
+++ stable/10/lib/libc/locale/utf8.c	Wed Sep  2 05:55:57 2015	(r287393)
@@ -145,14 +145,6 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, 
 			mask = 0x07;
 			want = 4;
 			lbound = 0x10000;
-		} else if ((ch & 0xfc) == 0xf8) {
-			mask = 0x03;
-			want = 5;
-			lbound = 0x200000;
-		} else if ((ch & 0xfe) == 0xfc) {
-			mask = 0x01;
-			want = 6;
-			lbound = 0x4000000;
 		} else {
 			/*
 			 * Malformed input; input is not UTF-8.
@@ -199,7 +191,7 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, 
 		errno = EILSEQ;
 		return ((size_t)-1);
 	}
-	if (wch >= 0xd800 && wch <= 0xdfff) {
+	if ((wch >= 0xd800 && wch <= 0xdfff) || wch > 0x10ffff) {
 		/*
 		 * Malformed input; invalid code points.
 		 */
@@ -326,17 +318,15 @@ _UTF8_wcrtomb(char * __restrict s, wchar
 		lead = 0xc0;
 		len = 2;
 	} else if ((wc & ~0xffff) == 0) {
+		if (wc >= 0xd800 && wc <= 0xdfff) {
+			errno = EILSEQ;
+			return ((size_t)-1);
+		}
 		lead = 0xe0;
 		len = 3;
-	} else if ((wc & ~0x1fffff) == 0) {
+	} else if (wc >= 0 && wc <= 0x10ffff) {
 		lead = 0xf0;
 		len = 4;
-	} else if ((wc & ~0x3ffffff) == 0) {
-		lead = 0xf8;
-		len = 5;
-	} else if ((wc & ~0x7fffffff) == 0) {
-		lead = 0xfc;
-		len = 6;
 	} else {
 		errno = EILSEQ;
 		return ((size_t)-1);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201509020555.t825twGv027171>