Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite regex and add extra check to variable names #74

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions src/Sav/Writer.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public function __construct($data = [], $buffer = null)
$this->write($data);
}
}

/**
* @param array $data
* @param string $file
Expand Down Expand Up @@ -101,7 +101,7 @@ public function write($data)

$this->data = new Record\Data();

$nominalIdx = 0;
$nominalIdx = 0;

/** @var Variable $var */
// for ($idx = 0; $idx <= $variablesCount; $idx++) {
Expand All @@ -110,12 +110,15 @@ public function write($data)
$var = new Variable($var);
}

//if (! preg_match('/^[A-Za-z0-9_]+$/', $var->name)) {
// UTF-8 and '.' characters could pass here
if (!preg_match('/^[A-Za-z0-9_\.\x{4e00}-\x{9fa5}]+$/u', $var->name)) {
if (!preg_match('/^(?!#|\$|\.)[\w0-9_.#@$\x{4e00}-\x{9fa5}]+(?<!\.|_)$/u', $var->name)) {
throw new \InvalidArgumentException(sprintf('Variable name `%s` contains an illegal character.', $var->name));
}

if (in_array($var->name, ['ALL', 'AND', 'BY', 'EQ', 'GE', 'GT', 'LE', 'LT', 'NE', 'NOT', 'OR', 'TO', 'WITH'])) {
$var->name = \uniqid($var->name);
}

if (empty($var->width)) {
throw new \InvalidArgumentException(sprintf('Invalid field width. Should be an integer number greater than zero.'));
}
Expand Down
80 changes: 80 additions & 0 deletions tests/NamingTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
<?php

use SPSS\Sav\Reader;
use SPSS\Sav\Record\Info\LongVariableNames;
use SPSS\Sav\Writer;
use SPSS\Tests\TestCase;

class NamingTest extends TestCase
{
public function illegalNameProvider()
{
return [
['#FOO', ''],
['$FOO', ''],
['.FOO', ''],
['FOO.', ''],
['FOO_', ''],
];
}

public function testReservedNames()
{
$data = [
'header' => [
'prodName' => '@(#) IBM SPSS STATISTICS',
'layoutCode' => 2,
'creationDate' => date('d M y'),
'creationTime' => date('H:i:s'),
],
'variables' => [
[
'name' => 'WITH',
'width' => 16,
'format' => 1,
],
[
'name' => 'OR',
'format' => 5,
],
],
];
$writer = new Writer($data);

$buffer = $writer->getBuffer();
$buffer->rewind();

$reader = Reader::fromString($buffer->getStream())->read();

$this->assertRegExp('/^' . $data['variables'][0]['name'] . '[\w]{13}$/', $reader->info[LongVariableNames::SUBTYPE]['V00001']);
$this->assertRegExp('/^' . $data['variables'][1]['name'] . '[\w]{13}$/', $reader->info[LongVariableNames::SUBTYPE]['V00002']);
}


/**
* @dataProvider illegalNameProvider
*/
public function testIllegalNames($name)
{
$this->expectException(\InvalidArgumentException::class);
$this->expectExceptionMessage(sprintf('Variable name `%s` contains an illegal character.', $name));

$data = [
'header' => [
'prodName' => '@(#) IBM SPSS STATISTICS',
'layoutCode' => 2,
'creationDate' => date('d M y'),
'creationTime' => date('H:i:s'),
],
'variables' => [
[
'name' => $name,
'width' => 16,
'format' => 1,
],
],
];

new Writer($data);
}
}
4 changes: 2 additions & 2 deletions tests/SavRandomReadWriteTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public function provider()
$count = 1; // mt_rand(1, 20);
for ($i = 0; $i < $count; $i++) {
$var = $this->generateVariable([
'id' => $this->generateRandomString(mt_rand(2, 100)),
'id' => $this->generateRandomString(mt_rand(2, 100)) . 'a',
'casesCount' => $header['casesCount'],
]
);
Expand All @@ -50,7 +50,7 @@ public function provider()
$header['casesCount'] = 5;
for ($i = 0; $i < 100; $i++) {
$variable = $this->generateVariable([
'id' => $this->generateRandomString(mt_rand(2, 100)),
'id' => $this->generateRandomString(mt_rand(2, 100)) . 'a',
'casesCount' => $header['casesCount'],
]);
$header['nominalCaseSize'] = Utils::widthToOcts($variable['width']);
Expand Down
42 changes: 39 additions & 3 deletions tests/WriteMultibyteTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace SPSS\Tests;

use SPSS\Sav\Reader;
use SPSS\Sav\Record\Info\LongVariableNames;
use SPSS\Sav\Variable;
use SPSS\Sav\Writer;

Expand All @@ -11,7 +12,7 @@ class WriteMultibyteTest extends TestCase
public function testMultiByteLabel()
{
$data = [
'header' => [
'header' => [
'prodName' => '@(#) IBM SPSS STATISTICS',
'layoutCode' => 2,
'creationDate' => date('d M y'),
Expand Down Expand Up @@ -55,13 +56,12 @@ public function testMultiByteLabel()

/**
* ISSUE #20.
*
* Chinese value labels seem to work fine, but free text does not work
*/
public function testChinese()
{
$input = [
'header' => [
'header' => [
'prodName' => '@(#) IBM SPSS STATISTICS 64-bit Macintosh 23.0.0.0',
'creationDate' => '05 Oct 18',
'creationTime' => '01:36:53',
Expand Down Expand Up @@ -124,4 +124,40 @@ public function testChinese()
$expected[2][1] = $input['variables'][1]['data'][2];
$this->assertEquals($expected, $reader->data);
}

public function testMultiByteVariableName()
{
$data = [
'header' => [
'prodName' => '@(#) IBM SPSS STATISTICS',
'layoutCode' => 2,
'creationDate' => date('d M y'),
'creationTime' => date('H:i:s'),
],
'variables' => [
[
'name' => 'Å',
'width' => 16,
'format' => 1,
],
[
'name' => 'DSADÆØØÅÅÅÅÅSAAA',
'format' => 5,
],
],
];
$writer = new Writer($data);

$buffer = $writer->getBuffer();
$buffer->rewind();

$reader = Reader::fromString($buffer->getStream())->read();

// Short variable name
$this->assertEquals($data['variables'][0]['name'], $reader->info[LongVariableNames::SUBTYPE]['V00001']);
// Long variable name
$this->assertEquals($data['variables'][1]['name'], $reader->info[LongVariableNames::SUBTYPE]['V00002']);

}

}