From 0aa32721bfc035faaa6cd2c95b18c0f1fb50b636 Mon Sep 17 00:00:00 2001 From: Michiel Stornebrink Date: Sat, 18 Jan 2025 09:51:26 +0100 Subject: [PATCH] Issue/51 dot in name (#57) * Allow dot in middle of a prefixed name See #51 * Update test testBad07 accordingly * Add test to check for bad syntax with dot as last char of QName * Prevent last char of QName to be a dot --- lib/Parser/Turtle.php | 6 ++++++ test/fixtures/turtle/bad-07.ttl | 1 - .../turtle/gh51-sweetrdf-dot-in-name-bad.ttl | 9 ++++++++ .../turtle/gh51-sweetrdf-dot-in-name.out | 2 ++ .../turtle/gh51-sweetrdf-dot-in-name.ttl | 7 +++++++ tests/EasyRdf/Parser/TurtleTest.php | 21 ++++++++++++++++++- 6 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 test/fixtures/turtle/gh51-sweetrdf-dot-in-name-bad.ttl create mode 100644 test/fixtures/turtle/gh51-sweetrdf-dot-in-name.out create mode 100644 test/fixtures/turtle/gh51-sweetrdf-dot-in-name.ttl diff --git a/lib/Parser/Turtle.php b/lib/Parser/Turtle.php index 3d6bbce8..abe6da61 100644 --- a/lib/Parser/Turtle.php +++ b/lib/Parser/Turtle.php @@ -940,6 +940,11 @@ protected function parseQNameOrBoolean() } $c = $this->read(); } + + // Last char of name must not be a dot + if (mb_substr($localName, -1) === '.') { + throw new Exception("Turtle Parse Error: last character of QName must not be a dot", $this->line, $this->column - 1); + } } // Unread last character @@ -1217,6 +1222,7 @@ public static function isNameChar($c) self::isNameStartChar($c) || $o >= 0x30 && $o <= 0x39 // 0-9 || '-' == $c + || '.' == $c // dots are allowed in the middle of a name, not as start char || 0x00B7 == $o; } diff --git a/test/fixtures/turtle/bad-07.ttl b/test/fixtures/turtle/bad-07.ttl index 18ea4fa6..f064d119 100644 --- a/test/fixtures/turtle/bad-07.ttl +++ b/test/fixtures/turtle/bad-07.ttl @@ -1,4 +1,3 @@ # paths are not in turtle @prefix : . -:a.:b.:c . :a^:b^:c . diff --git a/test/fixtures/turtle/gh51-sweetrdf-dot-in-name-bad.ttl b/test/fixtures/turtle/gh51-sweetrdf-dot-in-name-bad.ttl new file mode 100644 index 00000000..ae406376 --- /dev/null +++ b/test/fixtures/turtle/gh51-sweetrdf-dot-in-name-bad.ttl @@ -0,0 +1,9 @@ +@prefix : . + +# A dot can't be the first nor the last character of the "after the semicolon part of a prefixed name" but is allowed in the middle +# See https://www.w3.org/TR/turtle/#grammar-production-PN_LOCAL +# See https://github.com/sweetrdf/easyrdf/issues/51 +# The triples below should not parse! +:SubjectWithEndDot. :predicate :Object . +:Subject :predicateWithEndDot. :Object . +:Subject :predicate :ObjectWithEndDot. . diff --git a/test/fixtures/turtle/gh51-sweetrdf-dot-in-name.out b/test/fixtures/turtle/gh51-sweetrdf-dot-in-name.out new file mode 100644 index 00000000..f1bee29f --- /dev/null +++ b/test/fixtures/turtle/gh51-sweetrdf-dot-in-name.out @@ -0,0 +1,2 @@ + . + . diff --git a/test/fixtures/turtle/gh51-sweetrdf-dot-in-name.ttl b/test/fixtures/turtle/gh51-sweetrdf-dot-in-name.ttl new file mode 100644 index 00000000..502bc150 --- /dev/null +++ b/test/fixtures/turtle/gh51-sweetrdf-dot-in-name.ttl @@ -0,0 +1,7 @@ +@prefix : . + +# A dot can't be the first nor the last character of the "after the semicolon part of a prefixed name" but is allowed in the middle +# See https://www.w3.org/TR/turtle/#grammar-production-PN_LOCAL +# See https://github.com/sweetrdf/easyrdf/issues/51 +:Subject.WithADot :predicate.withADot :Object.WithADot . +:Subject.With.Dots :predicate.with.dots :Object.With.Dots . diff --git a/tests/EasyRdf/Parser/TurtleTest.php b/tests/EasyRdf/Parser/TurtleTest.php index 5955a4c7..d8390352 100644 --- a/tests/EasyRdf/Parser/TurtleTest.php +++ b/tests/EasyRdf/Parser/TurtleTest.php @@ -434,7 +434,7 @@ public function testBad07() // paths are not in turtle $this->expectException('EasyRdf\Parser\Exception'); $this->expectExceptionMessage( - 'Turtle Parse Error: object for statement missing on line 3, column 5' + "Turtle Parse Error: expected an RDF value here, found '^' on line 3, column 3" ); $this->parseTurtle('turtle/bad-07.ttl'); } @@ -578,4 +578,23 @@ public function testIssue140() $this->assertEquals(14, $triple_count); */ } + + /** + * @see https://github.com/sweetrdf/easyrdf/issues/51 + * Notice this is an issue reported in the sweetrdf/easyrdf fork + */ + public function testIssue51() + { + $this->turtleTestCase('gh51-sweetrdf-dot-in-name'); + } + + public function testIssue51Bad() + { + // Test long literals with missing end + $this->expectException('EasyRdf\Parser\Exception'); + $this->expectExceptionMessage( + 'Turtle Parse Error: last character of QName must not be a dot on line 7, column 20' + ); + $this->parseTurtle('turtle/gh51-sweetrdf-dot-in-name-bad.ttl'); + } }