From 63ae0b66b61e6647aa57f63e53304f53adc3ba6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lester=20Carballo=20P=C3=A9rez?= Date: Mon, 12 Feb 2024 18:20:16 -0600 Subject: [PATCH 1/6] Calculation of the necessary cutting and size of the texts according to their character encoding --- src/Buffer.php | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/Buffer.php b/src/Buffer.php index 120cbc1..7e22ebe 100644 --- a/src/Buffer.php +++ b/src/Buffer.php @@ -214,7 +214,22 @@ public function read($length = null) } /** - * @param $data + * @param string $data + * @param int $maxLength + * + * @return int + */ + public function lengthBytes($data, $maxLength) + { + if (strtolower($this->streamCharset) != strtolower($this->systemCharset)) { + $data = mb_convert_encoding($data, $this->streamCharset, $this->systemCharset); + } + $data = mb_strcut($data, 0, $maxLength, $this->streamCharset); + return \strlen($data); + } + + /** + * @param string $data * @param int|string $length * * @return false|int @@ -224,7 +239,9 @@ public function writeString($data, $length = '*') if (strtolower($this->streamCharset) != strtolower($this->systemCharset)) { $data = mb_convert_encoding($data, $this->streamCharset, $this->systemCharset); } - + if (isset($length) && ($length != '*')) { + $data = mb_strcut($data, 0, $length, $this->streamCharset); + } return $this->write(pack('A' . $length, $data)); } From cf0c3e6a1c7327a45fd2aebf059d84985fa1499b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lester=20Carballo=20P=C3=A9rez?= Date: Mon, 12 Feb 2024 18:30:57 -0600 Subject: [PATCH 2/6] Delegate the calculation of the required bytes for the ValueSet label, to the buffer --- src/Sav/Record/ValueLabel.php | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/Sav/Record/ValueLabel.php b/src/Sav/Record/ValueLabel.php index d2dad5d..88deb16 100644 --- a/src/Sav/Record/ValueLabel.php +++ b/src/Sav/Record/ValueLabel.php @@ -96,22 +96,16 @@ public function write(Buffer $buffer) $buffer->writeInt(self::TYPE); $buffer->writeInt(\count($this->labels)); foreach ($this->labels as $item) { - $labelLength = min(mb_strlen($item['label']), self::LABEL_MAX_LENGTH); - $label = mb_substr($item['label'], 0, $labelLength); - $labelLengthBytes = mb_strlen($label, '8bit'); - while ($labelLengthBytes > 255) { - // Strip one char, can be multiple bytes - $label = mb_substr($label, 0, -1); - $labelLengthBytes = mb_strlen($label, '8bit'); - } - + $labelLengthBytes = $buffer->lengthBytes($item['label'], self::LABEL_MAX_LENGTH); + $labelLengthBytesRound = Utils::roundUp($labelLengthBytes + 1, 8) - 1; + if ($convertToDouble) { $item['value'] = Utils::stringToDouble($item['value']); } $buffer->writeDouble($item['value']); $buffer->write(\chr($labelLengthBytes)); - $buffer->writeString($label, Utils::roundUp($labelLengthBytes + 1, 8) - 1); + $buffer->writeString($item['label'], $labelLengthBytesRound); } // Value label variable record. From 3a795bd06248ffd45642c72c8e31d447c9f16e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lester=20Carballo=20P=C3=A9rez?= Date: Mon, 12 Feb 2024 19:38:54 -0600 Subject: [PATCH 3/6] Delegate the calculation of the required bytes for the variable label, to the buffer --- src/Sav/Record/Variable.php | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/Sav/Record/Variable.php b/src/Sav/Record/Variable.php index bab4cc6..55190ae 100644 --- a/src/Sav/Record/Variable.php +++ b/src/Sav/Record/Variable.php @@ -141,17 +141,10 @@ public function write(Buffer $buffer) $buffer->writeString($this->name, 8); if ($hasLabel) { - // Maxlength is 255 bytes, since we write utf8 a char can be multiple bytes - $labelLength = min(mb_strlen($this->label), 255); - $label = mb_substr($this->label, 0, $labelLength); - $labelLengthBytes = mb_strlen($label, '8bit'); - while ($labelLengthBytes > 255) { - // Strip one char, can be multiple bytes - $label = mb_substr($label, 0, -1); - $labelLengthBytes = mb_strlen($label, '8bit'); - } + $labelLengthBytes = $buffer->lengthBytes($this->label, self::REAL_VLS_CHUNK); + $labelLengthBytesRound = Utils::roundUp($labelLengthBytes, 4); $buffer->writeInt($labelLengthBytes); - $buffer->writeString($label, Utils::roundUp($labelLengthBytes, 4)); + $buffer->writeString($this->label, $labelLengthBytesRound); } // TODO: test @@ -219,9 +212,9 @@ public function getSegmentName($seg = 0) { // TODO: refactory $name = $this->name; - $name = mb_substr($name, 0, 6); + $name = mb_strcut($name, 0, 6, $buffer->systemCharset); $name .= $seg; - return mb_strtoupper($name); + return mb_strtoupper($name, $buffer->systemCharset); } } From e4093b493a32efcaa240b6e71b5dd6dbaf2c0cdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lester=20Carballo=20P=C3=A9rez?= Date: Mon, 12 Feb 2024 20:09:58 -0600 Subject: [PATCH 4/6] Delegate the calculation of the required bytes for the LongStringValueLabels label, to the buffer --- src/Buffer.php | 6 ++++-- src/Sav/Record/Info/LongStringValueLabels.php | 10 ++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/Buffer.php b/src/Buffer.php index 7e22ebe..642a5a1 100644 --- a/src/Buffer.php +++ b/src/Buffer.php @@ -219,12 +219,14 @@ public function read($length = null) * * @return int */ - public function lengthBytes($data, $maxLength) + public function lengthBytes($data, $maxLength = null) { if (strtolower($this->streamCharset) != strtolower($this->systemCharset)) { $data = mb_convert_encoding($data, $this->streamCharset, $this->systemCharset); } - $data = mb_strcut($data, 0, $maxLength, $this->streamCharset); + if (isset($maxLength)) { + $data = mb_strcut($data, 0, $maxLength, $this->streamCharset); + } return \strlen($data); } diff --git a/src/Sav/Record/Info/LongStringValueLabels.php b/src/Sav/Record/Info/LongStringValueLabels.php index e804588..5fcb696 100644 --- a/src/Sav/Record/Info/LongStringValueLabels.php +++ b/src/Sav/Record/Info/LongStringValueLabels.php @@ -45,15 +45,17 @@ public function write(Buffer $buffer) throw new \InvalidArgumentException('values required'); } $width = (int) $data['width']; - $localBuffer->writeInt(mb_strlen($varName)); - $localBuffer->writeString($varName, mb_strlen($varName)); + $varLengthBytes = $buffer->lengthBytes($varName); + $localBuffer->writeInt($varLengthBytes); + $localBuffer->writeString($varName, $varLengthBytes); $localBuffer->writeInt($width); $localBuffer->writeInt(Utils::is_countable($data['values']) ? \count($data['values']) : 0); foreach ($data['values'] as $value => $label) { $localBuffer->writeInt($width); $localBuffer->writeString($value, $width); - $localBuffer->writeInt(mb_strlen($label)); - $localBuffer->writeString($label, mb_strlen($label)); + $labelLengthBytes = $buffer->lengthBytes($label); + $localBuffer->writeInt($labelLengthBytes); + $localBuffer->writeString($label, $labelLengthBytes); } } From 8e9dd3fdc16085d1812e97830a2a00b6a9cd2a27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lester=20Carballo=20P=C3=A9rez?= Date: Mon, 12 Feb 2024 20:16:20 -0600 Subject: [PATCH 5/6] Estimate the size in bytes in the data of the VariableAttributes --- src/Sav/Record/Info/VariableAttributes.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Sav/Record/Info/VariableAttributes.php b/src/Sav/Record/Info/VariableAttributes.php index 46a90b2..5a1e088 100644 --- a/src/Sav/Record/Info/VariableAttributes.php +++ b/src/Sav/Record/Info/VariableAttributes.php @@ -44,7 +44,7 @@ public function write(Buffer $buffer) if ($lines !== []) { $data = implode('/', $lines); - $this->dataCount = mb_strlen($data); + $this->dataCount = \strlen($data); parent::write($buffer); $buffer->writeString($data); } From d0e1ff8ae9659a701d9f6053bd4e995f59c52975 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lester=20Carballo=20P=C3=A9rez?= Date: Mon, 12 Feb 2024 22:01:18 -0600 Subject: [PATCH 6/6] Fixing the introduced previous errors --- src/Sav/Record/Variable.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Sav/Record/Variable.php b/src/Sav/Record/Variable.php index 55190ae..2a1373e 100644 --- a/src/Sav/Record/Variable.php +++ b/src/Sav/Record/Variable.php @@ -173,10 +173,10 @@ public function write(Buffer $buffer) $buffer->writeInt(0); // No missing values $buffer->writeInt($format); // Print format $buffer->writeInt($format); // Write format - $buffer->writeString($this->getSegmentName($i - 1), 8); + $buffer->writeString($this->getSegmentName($buffer, $i - 1), 8); if ($hasLabel) { $buffer->writeInt($labelLengthBytes); - $buffer->writeString($label, Utils::roundUp($labelLengthBytes, 4)); + $buffer->writeString($this->label, Utils::roundUp($labelLengthBytes, 4)); } $this->writeBlank($buffer, $segmentWidth); @@ -208,7 +208,7 @@ public function writeBlank(Buffer $buffer, $width) * * @return string */ - public function getSegmentName($seg = 0) + public function getSegmentName($buffer, $seg = 0) { // TODO: refactory $name = $this->name;