Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 22 Nov 2009 21:46:16 GMT
From:      Yevgen Drachenko <geka@sippysoft.com>
To:        freebsd-gnats-submit@FreeBSD.org
Subject:   misc/140794: Add support of Unicode for BIFF8 files.
Message-ID:  <200911222146.nAMLkGd6069755@www.freebsd.org>
Resent-Message-ID: <200911222150.nAMLo0jB042319@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         140794
>Category:       misc
>Synopsis:       Add support of Unicode for BIFF8 files.
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Sun Nov 22 21:50:00 UTC 2009
>Closed-Date:
>Last-Modified:
>Originator:     Yevgen Drachenko
>Release:        FreeBSD 6.3-PRERELEASE
>Organization:
Sippy Software, Inc.
>Environment:
FreeBSD ssp-geka.sippysoft.com 6.3-PRERELEASE FreeBSD 6.3-PRERELEASE #1: Thu Nov 22 02:23:12 UTC 2007     root@pioneer.sippysoft.com:/usr/obj/i386/usr/src/sys/SSP-PRODUCTION  i386

>Description:
$workbook->setVersion(8) together with $worksheet->setInputEncoding('UTF-8') crashes excel.
>How-To-Repeat:

>Fix:


Patch attached with submission follows:

diff -Nur /usr/ports/textproc/pear-Spreadsheet_Excel_Writer/Makefile pear-Spreadsheet_Excel_Writer.new/Makefile
--- /usr/ports/textproc/pear-Spreadsheet_Excel_Writer/Makefile	2007-04-14 04:49:04.000000000 +0300
+++ pear-Spreadsheet_Excel_Writer.new/Makefile	2009-11-22 23:15:51.000000000 +0200
@@ -7,6 +7,7 @@
 
 PORTNAME=	Spreadsheet_Excel_Writer
 PORTVERSION=	0.9.1
+PORTREVISION=	1
 CATEGORIES=	textproc www pear
 
 MAINTAINER=	pav@FreeBSD.org
diff -Nur /usr/ports/textproc/pear-Spreadsheet_Excel_Writer/files/patch-biff8_utf8 pear-Spreadsheet_Excel_Writer.new/files/patch-biff8_utf8
--- /usr/ports/textproc/pear-Spreadsheet_Excel_Writer/files/patch-biff8_utf8	1970-01-01 03:00:00.000000000 +0300
+++ pear-Spreadsheet_Excel_Writer.new/files/patch-biff8_utf8	2009-11-22 23:29:18.000000000 +0200
@@ -0,0 +1,199 @@
+--- Writer/Workbook.php.orig	2005-11-08 05:32:52.000000000 +0200
++++ Writer/Workbook.php	2009-11-22 23:14:33.000000000 +0200
+@@ -1311,9 +1311,10 @@
+            8228 : Maximum Excel97 block size
+              -4 : Length of block header
+              -8 : Length of additional SST header information
+-         = 8216
++		     -8 : Arbitrary number to keep within _add_continue() limit
++         = 8208
+         */
+-        $continue_limit     = 8216;
++        $continue_limit     = 8208;
+         $block_length       = 0;
+         $written            = 0;
+         $this->_block_sizes = array();
+@@ -1321,6 +1322,9 @@
+ 
+         foreach (array_keys($this->_str_table) as $string) {
+             $string_length = strlen($string);
++			$headerinfo    = unpack("vlength/Cencoding", $string);
++			$encoding      = $headerinfo["encoding"];
++			$split_string  = 0;
+ 
+             // Block length is the total length of the strings that will be
+             // written out in a single SST or CONTINUE block.
+@@ -1347,16 +1351,39 @@
+                 boundaries. Therefore, in some cases we need to reduce the
+                 amount of available
+                 */
++				$align = 0;
++
++				# Only applies to Unicode strings
++				if ($encoding == 1) {
++					# Min string + header size -1
++					$header_length = 4;
++
++					if ($space_remaining > $header_length) {
++						# String contains 3 byte header => split on odd boundary
++						if (!$split_string && $space_remaining % 2 != 1) {
++							$space_remaining--;
++							$align = 1;
++						}
++						# Split section without header => split on even boundary
++						else if ($split_string && $space_remaining % 2 == 1) {
++							$space_remaining--;
++							$align = 1;
++						}
++
++						$split_string = 1;
++					}
++				}
++
+ 
+                 if ($space_remaining > $header_length) {
+                     // Write as much as possible of the string in the current block
+                     $written      += $space_remaining;
+ 
+                     // Reduce the current block length by the amount written
+-                    $block_length -= $continue_limit - $continue;
++                    $block_length -= $continue_limit - $continue - $align;
+ 
+                     // Store the max size for this block
+-                    $this->_block_sizes[] = $continue_limit;
++                    $this->_block_sizes[] = $continue_limit - $align;
+ 
+                     // If the current string was split then the next CONTINUE block
+                     // should have the string continue flag (grbit) set unless the
+@@ -1398,13 +1425,19 @@
+          This length is required to set the offsets in the BOUNDSHEET records since
+          they must be written before the SST records
+         */
+-        $total_offset = array_sum($this->_block_sizes);
+-        // SST information
+-        $total_offset += 8;
+-        if (!empty($this->_block_sizes)) {
+-            $total_offset += (count($this->_block_sizes)) * 4; // add CONTINUE headers
+-        }
+-        return $total_offset;
++
++		$tmp_block_sizes = array();
++		$tmp_block_sizes = $this->_block_sizes;
++
++		$length  = 12;
++		if (!empty($tmp_block_sizes)) {
++			$length += array_shift($tmp_block_sizes); # SST
++		}
++		while (!empty($tmp_block_sizes)) {
++			$length += 4 + array_shift($tmp_block_sizes); # CONTINUEs
++		}
++
++		return $length;
+     }
+ 
+     /**
+@@ -1421,9 +1454,31 @@
+     function _storeSharedStringsTable()
+     {
+         $record  = 0x00fc;  // Record identifier
++		$length  = 0x0008;  // Number of bytes to follow
++		$total   = 0x0000;
++
++        // Iterate through the strings to calculate the CONTINUE block sizes
++        $continue_limit = 8208;
++        $block_length   = 0;
++        $written        = 0;
++        $continue       = 0;
++
+         // sizes are upside down
+-        $this->_block_sizes = array_reverse($this->_block_sizes);
+-        $length = array_pop($this->_block_sizes) + 8; // First block size plus SST information
++		$tmp_block_sizes = $this->_block_sizes;
++//        $tmp_block_sizes = array_reverse($this->_block_sizes);
++
++		# The SST record is required even if it contains no strings. Thus we will
++		# always have a length
++		#
++		if (!empty($tmp_block_sizes)) {
++			$length = 8 + array_shift($tmp_block_sizes);
++		}
++		else {
++			# No strings
++			$length = 8;
++		}
++
++
+ 
+         // Write the SST block header information
+         $header      = pack("vv", $record, $length);
+@@ -1431,18 +1486,14 @@
+         $this->_append($header . $data);
+ 
+ 
+-        // Iterate through the strings to calculate the CONTINUE block sizes
+-        $continue_limit = 8216;
+-        $block_length   = 0;
+-        $written        = 0;
+-        $continue       = 0;
+ 
+ 
+         /* TODO: not good for performance */
+         foreach (array_keys($this->_str_table) as $string) {
+ 
+             $string_length = strlen($string);
+-            $encoding      = 0; // assume there are no Unicode strings
++			$headerinfo    = unpack("vlength/Cencoding", $string);
++			$encoding      = $headerinfo["encoding"];
+             $split_string  = 0;
+ 
+             // Block length is the total length of the strings that will be
+@@ -1473,6 +1524,30 @@
+ 
+                 // Unicode data should only be split on char (2 byte) boundaries.
+                 // Therefore, in some cases we need to reduce the amount of available
++	            // space by 1 byte to ensure the correct alignment.
++    	        $align = 0;
++
++				// Only applies to Unicode strings
++				if ($encoding == 1) {
++					// Min string + header size -1
++					$header_length = 4;
++
++					if ($space_remaining > $header_length) {
++						// String contains 3 byte header => split on odd boundary
++						if (!$split_string && $space_remaining % 2 != 1) {
++							$space_remaining--;
++							$align = 1;
++						}
++						// Split section without header => split on even boundary
++						else if ($split_string && $space_remaining % 2 == 1) {
++							$space_remaining--;
++							$align = 1;
++						}
++
++						$split_string = 1;
++					}
++				}
++
+ 
+                 if ($space_remaining > $header_length) {
+                     // Write as much as possible of the string in the current block
+@@ -1483,7 +1558,7 @@
+                     $string = substr($string, $space_remaining);
+ 
+                     // Reduce the current block length by the amount written
+-                    $block_length -= $continue_limit - $continue;
++                    $block_length -= $continue_limit - $continue - $align;
+ 
+                     // If the current string was split then the next CONTINUE block
+                     // should have the string continue flag (grbit) set unless the
+@@ -1503,7 +1578,8 @@
+                 // Write the CONTINUE block header
+                 if (!empty($this->_block_sizes)) {
+                     $record  = 0x003C;
+-                    $length  = array_pop($this->_block_sizes);
++                    $length  = array_shift($tmp_block_sizes);
++
+                     $header  = pack('vv', $record, $length);
+                     if ($continue) {
+                         $header .= pack('C', $encoding);


>Release-Note:
>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200911222146.nAMLkGd6069755>