From 668efe81e024661ba641ee0ad8abd8c7a99d6df8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Sinnbeck?= Date: Tue, 13 Feb 2024 15:13:05 +0100 Subject: [PATCH 1/6] rewrite regex and add extra check to ensure it follows current spss naming rules --- src/Sav/Writer.php | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Sav/Writer.php b/src/Sav/Writer.php index 36260d8..3b857c4 100644 --- a/src/Sav/Writer.php +++ b/src/Sav/Writer.php @@ -59,7 +59,7 @@ public function __construct($data = [], $buffer = null) $this->write($data); } } - + /** * @param array $data * @param string $file @@ -110,12 +110,15 @@ public function write($data) $var = new Variable($var); } - //if (! preg_match('/^[A-Za-z0-9_]+$/', $var->name)) { // UTF-8 and '.' characters could pass here - if (!preg_match('/^[A-Za-z0-9_\.\x{4e00}-\x{9fa5}]+$/u', $var->name)) { + if (!preg_match('/^(?!#|\$|\.)[\w0-9_.#@$\x{4e00}-\x{9fa5}]+(?name)) { throw new \InvalidArgumentException(sprintf('Variable name `%s` contains an illegal character.', $var->name)); } + if (in_array($var->name, ['ALL', 'AND', 'BY', 'EQ', 'GE', 'GT', 'LE', 'LT', 'NE', 'NOT', 'OR', 'TO', 'WITH'])) { + throw new \InvalidArgumentException(sprintf('Variable name `%s` is reserved!.', $var->name)); + } + if (empty($var->width)) { throw new \InvalidArgumentException(sprintf('Invalid field width. Should be an integer number greater than zero.')); } From c2ebe1d9a31948043b2734a1bfaf0faa4dd0b919 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Sinnbeck?= Date: Tue, 13 Feb 2024 15:55:27 +0100 Subject: [PATCH 2/6] add tests --- tests/WriteMultibyteTest.php | 42 +++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/tests/WriteMultibyteTest.php b/tests/WriteMultibyteTest.php index d00b3e9..c42bea7 100644 --- a/tests/WriteMultibyteTest.php +++ b/tests/WriteMultibyteTest.php @@ -3,6 +3,7 @@ namespace SPSS\Tests; use SPSS\Sav\Reader; +use SPSS\Sav\Record\Info\LongVariableNames; use SPSS\Sav\Variable; use SPSS\Sav\Writer; @@ -11,7 +12,7 @@ class WriteMultibyteTest extends TestCase public function testMultiByteLabel() { $data = [ - 'header' => [ + 'header' => [ 'prodName' => '@(#) IBM SPSS STATISTICS', 'layoutCode' => 2, 'creationDate' => date('d M y'), @@ -55,13 +56,12 @@ public function testMultiByteLabel() /** * ISSUE #20. - * * Chinese value labels seem to work fine, but free text does not work */ public function testChinese() { $input = [ - 'header' => [ + 'header' => [ 'prodName' => '@(#) IBM SPSS STATISTICS 64-bit Macintosh 23.0.0.0', 'creationDate' => '05 Oct 18', 'creationTime' => '01:36:53', @@ -124,4 +124,40 @@ public function testChinese() $expected[2][1] = $input['variables'][1]['data'][2]; $this->assertEquals($expected, $reader->data); } + + public function testMultiByteVariableName() + { + $data = [ + 'header' => [ + 'prodName' => '@(#) IBM SPSS STATISTICS', + 'layoutCode' => 2, + 'creationDate' => date('d M y'), + 'creationTime' => date('H:i:s'), + ], + 'variables' => [ + [ + 'name' => 'Å', + 'width' => 16, + 'format' => 1, + ], + [ + 'name' => 'DSADÆØØÅÅÅÅÅSAAA', + 'format' => 5, + ], + ], + ]; + $writer = new Writer($data); + + $buffer = $writer->getBuffer(); + $buffer->rewind(); + + $reader = Reader::fromString($buffer->getStream())->read(); + + // Short variable name + $this->assertEquals($data['variables'][0]['name'], $reader->info[LongVariableNames::SUBTYPE]['V00001']); + // Long variable name + $this->assertEquals($data['variables'][1]['name'], $reader->info[LongVariableNames::SUBTYPE]['V00002']); + + } + } From e9d395e01c49b6745049804d72a9e3ec6b6504fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Sinnbeck?= Date: Wed, 14 Feb 2024 08:04:12 +0100 Subject: [PATCH 3/6] dont throw exception on reserved names --- src/Sav/Writer.php | 20 +++++++++++++++++-- tests/NamingTest.php | 47 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 tests/NamingTest.php diff --git a/src/Sav/Writer.php b/src/Sav/Writer.php index 3b857c4..677bda5 100644 --- a/src/Sav/Writer.php +++ b/src/Sav/Writer.php @@ -101,7 +101,22 @@ public function write($data) $this->data = new Record\Data(); - $nominalIdx = 0; + $nominalIdx = 0; + $reservedNamesIndex = [ + 'ALL' => 0, + 'AND' => 0, + 'BY' => 0, + 'EQ' => 0, + 'GE' => 0, + 'GT' => 0, + 'LE' => 0, + 'LT' => 0, + 'NE' => 0, + 'NOT' => 0, + 'OR' => 0, + 'TO' => 0, + 'WITH' => 0, + ]; /** @var Variable $var */ // for ($idx = 0; $idx <= $variablesCount; $idx++) { @@ -116,7 +131,8 @@ public function write($data) } if (in_array($var->name, ['ALL', 'AND', 'BY', 'EQ', 'GE', 'GT', 'LE', 'LT', 'NE', 'NOT', 'OR', 'TO', 'WITH'])) { - throw new \InvalidArgumentException(sprintf('Variable name `%s` is reserved!.', $var->name)); + $reservedNamesIndex[$var->name]++; + $var->name = $var->name . '_' . $reservedNamesIndex[$var->name]; } if (empty($var->width)) { diff --git a/tests/NamingTest.php b/tests/NamingTest.php new file mode 100644 index 0000000..9cd1867 --- /dev/null +++ b/tests/NamingTest.php @@ -0,0 +1,47 @@ + [ + 'prodName' => '@(#) IBM SPSS STATISTICS', + 'layoutCode' => 2, + 'creationDate' => date('d M y'), + 'creationTime' => date('H:i:s'), + ], + 'variables' => [ + [ + 'name' => 'WITH', + 'width' => 16, + 'format' => 1, + ], + [ + 'name' => 'WITH', + 'width' => 16, + 'format' => 1, + ], + [ + 'name' => 'OR', + 'format' => 5, + ], + ], + ]; + $writer = new Writer($data); + + $buffer = $writer->getBuffer(); + $buffer->rewind(); + + $reader = Reader::fromString($buffer->getStream())->read(); + + $this->assertEquals($data['variables'][0]['name'] . '_' . 1, $reader->info[LongVariableNames::SUBTYPE]['V00001']); + $this->assertEquals($data['variables'][1]['name'] . '_' . 2, $reader->info[LongVariableNames::SUBTYPE]['V00002']); + $this->assertEquals($data['variables'][2]['name'] . '_' . 1, $reader->info[LongVariableNames::SUBTYPE]['V00003']); + } +} From 41af280c05324d453c8fbf03ee50f8b5e12fb767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Sinnbeck?= Date: Wed, 14 Feb 2024 08:15:10 +0100 Subject: [PATCH 4/6] add tests for illegal characters in variable names --- tests/NamingTest.php | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/tests/NamingTest.php b/tests/NamingTest.php index 9cd1867..b9383a6 100644 --- a/tests/NamingTest.php +++ b/tests/NamingTest.php @@ -7,7 +7,18 @@ class NamingTest extends TestCase { - public function test() + public function illegalNameProvider() + { + return [ + ['#FOO', ''], + ['$FOO', ''], + ['.FOO', ''], + ['FOO.', ''], + ['FOO_', ''], + ]; + } + + public function testReservedNames() { $data = [ 'header' => [ @@ -44,4 +55,32 @@ public function test() $this->assertEquals($data['variables'][1]['name'] . '_' . 2, $reader->info[LongVariableNames::SUBTYPE]['V00002']); $this->assertEquals($data['variables'][2]['name'] . '_' . 1, $reader->info[LongVariableNames::SUBTYPE]['V00003']); } + + + /** + * @dataProvider illegalNameProvider + */ + public function testIllegalNames($name) + { + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessage(sprintf('Variable name `%s` contains an illegal character.', $name)); + + $data = [ + 'header' => [ + 'prodName' => '@(#) IBM SPSS STATISTICS', + 'layoutCode' => 2, + 'creationDate' => date('d M y'), + 'creationTime' => date('H:i:s'), + ], + 'variables' => [ + [ + 'name' => $name, + 'width' => 16, + 'format' => 1, + ], + ], + ]; + + new Writer($data); + } } From bc259f49ff924d21e89e424d58ddab424a22d72a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Sinnbeck?= Date: Fri, 16 Feb 2024 08:21:25 +0100 Subject: [PATCH 5/6] ensure unique names for reserved variable names --- src/Sav/Writer.php | 18 +----------------- tests/NamingTest.php | 10 ++-------- 2 files changed, 3 insertions(+), 25 deletions(-) diff --git a/src/Sav/Writer.php b/src/Sav/Writer.php index 677bda5..c6e27db 100644 --- a/src/Sav/Writer.php +++ b/src/Sav/Writer.php @@ -102,21 +102,6 @@ public function write($data) $this->data = new Record\Data(); $nominalIdx = 0; - $reservedNamesIndex = [ - 'ALL' => 0, - 'AND' => 0, - 'BY' => 0, - 'EQ' => 0, - 'GE' => 0, - 'GT' => 0, - 'LE' => 0, - 'LT' => 0, - 'NE' => 0, - 'NOT' => 0, - 'OR' => 0, - 'TO' => 0, - 'WITH' => 0, - ]; /** @var Variable $var */ // for ($idx = 0; $idx <= $variablesCount; $idx++) { @@ -131,8 +116,7 @@ public function write($data) } if (in_array($var->name, ['ALL', 'AND', 'BY', 'EQ', 'GE', 'GT', 'LE', 'LT', 'NE', 'NOT', 'OR', 'TO', 'WITH'])) { - $reservedNamesIndex[$var->name]++; - $var->name = $var->name . '_' . $reservedNamesIndex[$var->name]; + $var->name = \uniqid($var->name); } if (empty($var->width)) { diff --git a/tests/NamingTest.php b/tests/NamingTest.php index b9383a6..b417ed8 100644 --- a/tests/NamingTest.php +++ b/tests/NamingTest.php @@ -28,11 +28,6 @@ public function testReservedNames() 'creationTime' => date('H:i:s'), ], 'variables' => [ - [ - 'name' => 'WITH', - 'width' => 16, - 'format' => 1, - ], [ 'name' => 'WITH', 'width' => 16, @@ -51,9 +46,8 @@ public function testReservedNames() $reader = Reader::fromString($buffer->getStream())->read(); - $this->assertEquals($data['variables'][0]['name'] . '_' . 1, $reader->info[LongVariableNames::SUBTYPE]['V00001']); - $this->assertEquals($data['variables'][1]['name'] . '_' . 2, $reader->info[LongVariableNames::SUBTYPE]['V00002']); - $this->assertEquals($data['variables'][2]['name'] . '_' . 1, $reader->info[LongVariableNames::SUBTYPE]['V00003']); + $this->assertRegExp('/^' . $data['variables'][0]['name'] . '[\w]{13}$/', $reader->info[LongVariableNames::SUBTYPE]['V00001']); + $this->assertRegExp('/^' . $data['variables'][1]['name'] . '[\w]{13}$/', $reader->info[LongVariableNames::SUBTYPE]['V00002']); } From bfe88d799cce49d554b5d80db12fa971d5888265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Sinnbeck?= Date: Fri, 16 Feb 2024 08:23:01 +0100 Subject: [PATCH 6/6] ensure variable names in tests ends with a letter --- tests/SavRandomReadWriteTest.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/SavRandomReadWriteTest.php b/tests/SavRandomReadWriteTest.php index 0814ed7..4b5f22a 100644 --- a/tests/SavRandomReadWriteTest.php +++ b/tests/SavRandomReadWriteTest.php @@ -37,7 +37,7 @@ public function provider() $count = 1; // mt_rand(1, 20); for ($i = 0; $i < $count; $i++) { $var = $this->generateVariable([ - 'id' => $this->generateRandomString(mt_rand(2, 100)), + 'id' => $this->generateRandomString(mt_rand(2, 100)) . 'a', 'casesCount' => $header['casesCount'], ] ); @@ -50,7 +50,7 @@ public function provider() $header['casesCount'] = 5; for ($i = 0; $i < 100; $i++) { $variable = $this->generateVariable([ - 'id' => $this->generateRandomString(mt_rand(2, 100)), + 'id' => $this->generateRandomString(mt_rand(2, 100)) . 'a', 'casesCount' => $header['casesCount'], ]); $header['nominalCaseSize'] = Utils::widthToOcts($variable['width']);