Skip to content

Commit

Permalink
Merge pull request #21 from mennodekker/master
Browse files Browse the repository at this point in the history
Fix for #20 read/write multibyte chars in free text
  • Loading branch information
tiamo authored Mar 5, 2019
2 parents a17eb3a + f5b3e55 commit 12417dc
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 41 deletions.
84 changes: 43 additions & 41 deletions src/Sav/Record/Data.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public function read(Buffer $buffer)
foreach ($variables as $index => $var) {

$isNumeric = $var->width == 0;
$width = isset($var->write[1]) ? $var->write[1] : $var->width;
$width = isset($var->write[2]) ? $var->write[2] : $var->width;

// var_dump($var);
// exit;
Expand Down Expand Up @@ -119,47 +119,49 @@ public function read(Buffer $buffer)
}
}
} else {
$val = '';
if (! $compressed) {
$val = $buffer->readString(8);
} else {
$opcode = $this->readOpcode($buffer);
switch ($opcode) {
case self::OPCODE_NOP;
break;
case self::OPCODE_EOF;
throw new Exception(
'Error reading data: unexpected end of compressed data file (cluster code 252)'
);
break;
case self::OPCODE_RAW_DATA;
$val = $buffer->readString(8);
break;
case self::OPCODE_WHITESPACES;
$val = ' ';
break;
}
}

if ($parent >= 0) {
$this->matrix[$case][$parent] .= $val;
$octs--;
if ($octs <= 0) {
$this->matrix[$case][$parent] = rtrim($this->matrix[$case][$parent]);
$parent = -1;
do {
$val = '';
if (! $compressed) {
$val = $buffer->readString(8);
} else {
$opcode = $this->readOpcode($buffer);
switch ($opcode) {
case self::OPCODE_NOP;
break;
case self::OPCODE_EOF;
throw new Exception(
'Error reading data: unexpected end of compressed data file (cluster code 252)'
);
break;
case self::OPCODE_RAW_DATA;
$val = $buffer->readString(8);
break;
case self::OPCODE_WHITESPACES;
$val = ' ';
break;
}
}
} else {
$width = isset($veryLongStrings[$var->name]) ? $veryLongStrings[$var->name] : $width;
if ($width > 0) {
$octs = Utils::widthToOcts($width) - 1; // Buffer::roundUp($width, 8) / 8) -1;
if ($octs > 0) {
$parent = $index;
} else {
$val = rtrim($val);

if ($parent >= 0) {
$this->matrix[$case][$parent] .= $val;
$octs--;
if ($octs <= 0) {
$this->matrix[$case][$parent] = rtrim($this->matrix[$case][$parent]);
$parent = -1;
}
} else {
$width = isset($veryLongStrings[$var->name]) ? $veryLongStrings[$var->name] : $width;
if ($width > 0) {
$octs = Utils::widthToOcts($width) - 1; // Buffer::roundUp($width, 8) / 8) -1;
if ($octs > 0) {
$parent = $index;
} else {
$val = rtrim($val);
}
$this->matrix[$case][$index] = $val;
}
$this->matrix[$case][$index] = $val;
}
}
} while ($octs > 0);
}
}
}
Expand Down Expand Up @@ -246,7 +248,7 @@ public function write(Buffer $buffer)
$segWidth = Utils::segmentAllocWidth($width, $s);
for ($i = $segWidth; $i > 0; $i -= 8, $offset += 8) {
// $chunkSize = min($i, 8);
$val = mb_substr($value, $offset, 8);
$val = substr($value, $offset, 8); // Read 8 byte segements, don't use mbsubstr here
if ($val == "") {
$this->writeOpcode($buffer, $dataBuffer, self::OPCODE_WHITESPACES);
} else {
Expand Down Expand Up @@ -294,4 +296,4 @@ public function toArray()
{
return $this->matrix;
}
}
}
72 changes: 72 additions & 0 deletions tests/WriteMultibyteTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace SPSS\Tests;

use SPSS\Sav\Reader;
use SPSS\Sav\Variable;
use SPSS\Sav\Writer;

class WriteMultibyteTest extends TestCase
Expand Down Expand Up @@ -47,5 +48,76 @@ public function testMultiByteLabel()
// Long name
$this->assertEquals(mb_substr($data['variables'][1]['label'], 0, -1), $reader->variables[1]->label);
}

/**
* ISSUE #20
*
* Chinese value labels seem to work fine, but free text does not work
*/
public function testChinese()
{
$input = [
'header' => [
'prodName' => '@(#) IBM SPSS STATISTICS 64-bit Macintosh 23.0.0.0',
'creationDate' => '05 Oct 18',
'creationTime' => '01:36:53',
'weightIndex' => 0,
],
'variables' => [
[
'name' => 'test1',
'format' => Variable::FORMAT_TYPE_F,
'width' => 4,
'decimals' => 2,
'label' => 'test',
'values' => [
1 => '1测试中文标签1',
2 => '2测试中文标签2',
],
'missing' => [],
'columns' => 5,
'alignment' => Variable::ALIGN_RIGHT,
'measure' => Variable::MEASURE_SCALE,
'attributes' => [
'$@Role' => Variable::ROLE_PARTITION,
],
'data' => [1, 1, 1],
],
[
'name' => 'test2',
'format' => Variable::FORMAT_TYPE_A,
'width' => 100,
'label' => 'test',
'columns' => 100,
'alignment' => Variable::ALIGN_LEFT,
'measure' => Variable::MEASURE_NOMINAL,
'attributes' => [
'$@Role' => Variable::ROLE_SPLIT,
],
'data' => [
'测试中文数据1',
'测试中文数据2',
'测试中文数据3'
],
],
],
];

$writer = new Writer($input);

// Uncomment if you want to really save and check the resulting filein SPSS
//$writer->save('chinese.sav');
$buffer = $writer->getBuffer();
$buffer->rewind();

$reader = Reader::fromString($buffer->getStream())->read();
$expected[0][0] = $input['variables'][0]['data'][0];
$expected[0][1] = $input['variables'][1]['data'][0];
$expected[1][0] = $input['variables'][0]['data'][1];
$expected[1][1] = $input['variables'][1]['data'][1];
$expected[2][0] = $input['variables'][0]['data'][2];
$expected[2][1] = $input['variables'][1]['data'][2];
$this->assertEquals($expected, $reader->data);
}

}

0 comments on commit 12417dc

Please sign in to comment.