Skip to content

Commit

Permalink
fix note 7
Browse files Browse the repository at this point in the history
  • Loading branch information
jitsedesmet committed Dec 11, 2024
1 parent 47411d9 commit 76b5313
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 50 deletions.
5 changes: 3 additions & 2 deletions src/grammar/builder/parserBuilder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ export class Builder<Names extends string, RuleDefs extends RuleDefMap<Names>> {
const lexer: Lexer = new Lexer(tokenVocabulary, {
positionTracking: 'onlyStart',
recoveryEnabled: false,
skipValidations: true,
// SkipValidations: true,
ensureOptimizations: true,
...lexerConfig,
});
Expand All @@ -150,7 +150,8 @@ export class Builder<Names extends string, RuleDefs extends RuleDefMap<Names>> {
parser.input = lexResult.tokens;
const result = parser[rule.name](...args);
if (parser.errors.length > 0) {
throw new Error(`Parse error on line ${parser.errors[0].token.startLine}`);
throw new Error(`Parse error on line ${parser.errors.map(x => x.token.startLine).join(', ')}
${parser.errors.map(x => `${x.token.startLine}: ${x.message}`).join('\n')}`);
}
return result;
};
Expand Down
21 changes: 7 additions & 14 deletions src/lexer/sparql11/lexer.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
/* eslint-disable require-unicode-regexp */
import type { TokenType } from 'chevrotain';
import { Lexer } from 'chevrotain';
import { allBuiltInCalls } from './BuildinCalls.js';
import { allGraphTokens } from './graph.js';
import { createToken } from './helpers.js';
import { allSymbols } from './symbols.js';
import { allTerminals } from './terminals.js';
import { allTerminals, wsPattern } from './terminals.js';

export const baseDecl = createToken({ name: 'BaseDecl', pattern: /base/i, label: 'BASE' });
export const prefixDecl = createToken({ name: 'PrefixDecl', pattern: /prefix/i, label: 'PREFIX' });
Expand Down Expand Up @@ -36,9 +35,12 @@ export const add = createToken({ name: 'Add', pattern: /add/i, label: 'ADD' });
export const to = createToken({ name: 'To', pattern: /to/i, label: 'TO' });
export const move = createToken({ name: 'Move', pattern: /move/i, label: 'MOVE' });
export const copy = createToken({ name: 'Copy', pattern: /copy/i, label: 'COPY' });
export const insertData = createToken({ name: 'InsertData', pattern: /insert data/i, label: 'INSERT DATA' });
export const deleteData = createToken({ name: 'DeleteData', pattern: /delete data/i, label: 'DELETE DATA' });
export const deleteWhere = createToken({ name: 'DeleteWhere', pattern: /delete where/i, label: 'DELETE WHERE' });
const insertDataPattern = new RegExp(`insert(?:${wsPattern.source})*data`, 'i');
const deleteDataPattern = new RegExp(`delete(?:${wsPattern.source})*data`, 'i');
const deleteWherePattern = new RegExp(`delete(?:${wsPattern.source})*where`, 'i');
export const insertData = createToken({ name: 'InsertData', pattern: insertDataPattern, label: 'INSERT DATA' });
export const deleteData = createToken({ name: 'DeleteData', pattern: deleteDataPattern, label: 'DELETE DATA' });
export const deleteWhere = createToken({ name: 'DeleteWhere', pattern: deleteWherePattern, label: 'DELETE WHERE' });
export const modifyWith = createToken({ name: 'ModifyWith', pattern: /with/i, label: 'WITH' });
export const deleteClause = createToken({ name: 'DeleteClause', pattern: /delete/i, label: 'DELETE' });
export const insertClause = createToken({ name: 'InsertClause', pattern: /insert/i, label: 'INSERT' });
Expand Down Expand Up @@ -119,12 +121,3 @@ export const allTokens: TokenType[] = [
...allGraphTokens,
...allSymbols,
];

export const ChevSparqlLexer = new Lexer(allTokens, {
// PositionTracking: 'onlyOffset',
recoveryEnabled: false,
skipValidations: false,
safeMode: true,
positionTracking: 'full',
// EnsureOptimizations: true,
});
68 changes: 34 additions & 34 deletions src/lexer/sparql11/terminals.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,51 +3,51 @@ import { Lexer } from 'chevrotain';
import { createToken } from './helpers.js';

// eslint-disable-next-line max-len
const pnCharsBasePattern = /[A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|[\uD800-\uDB7F][\uDC00-\uDFFF]/;
const pnCharsUPattern = new RegExp(`${pnCharsBasePattern.source}|_`);
export const pnCharsBasePattern = /[A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]|[\uD800-\uDB7F][\uDC00-\uDFFF]/;
export const pnCharsUPattern = new RegExp(`${pnCharsBasePattern.source}|_`);
// eslint-disable-next-line no-misleading-character-class
const varNamePattern = new RegExp(`((${pnCharsUPattern.source})|[0-9])((${pnCharsUPattern.source})|[0-9]|[\u00B7\u0300-\u036F\u203F-\u2040])*`);
export const varNamePattern = new RegExp(`((${pnCharsUPattern.source})|[0-9])((${pnCharsUPattern.source})|[0-9]|[\u00B7\u0300-\u036F\u203F-\u2040])*`);
// eslint-disable-next-line no-control-regex
const iriRefPattern = /<([^\\<>"{}|^`\u0000-\u0020])*>/;
export const iriRefPattern = /<([^\\<>"{}|^`\u0000-\u0020])*>/;
// eslint-disable-next-line no-misleading-character-class
const pnCharsPattern = new RegExp(`(${pnCharsUPattern.source})|[\\-0-9\u00B7\u0300-\u036F\u203F-\u2040]`);
export const pnCharsPattern = new RegExp(`(${pnCharsUPattern.source})|[\\-0-9\u00B7\u0300-\u036F\u203F-\u2040]`);
// eslint-disable-next-line no-misleading-character-class
const pnPrefixPattern = new RegExp(`(${pnCharsBasePattern.source})(((${pnCharsPattern.source})|\\.)*(${pnCharsPattern.source}))?`);
export const pnPrefixPattern = new RegExp(`(${pnCharsBasePattern.source})(((${pnCharsPattern.source})|\\.)*(${pnCharsPattern.source}))?`);
// eslint-disable-next-line no-misleading-character-class
const pNameNsPattern = new RegExp(`(${pnPrefixPattern.source})?:`);
const percentPattern = /%[0-9A-Fa-f][0-9A-Fa-f]/;
const pnLocalEscPattern = /\\[_~.\-!$&'()*+,;=\\/?#@%]/;
const plxPattern = new RegExp(`(${percentPattern.source})|(${pnLocalEscPattern.source})`);
export const pNameNsPattern = new RegExp(`(${pnPrefixPattern.source})?:`);
export const percentPattern = /%[0-9A-Fa-f][0-9A-Fa-f]/;
export const pnLocalEscPattern = /\\[_~.\-!$&'()*+,;=\\/?#@%]/;
export const plxPattern = new RegExp(`(${percentPattern.source})|(${pnLocalEscPattern.source})`);
// eslint-disable-next-line no-misleading-character-class
const pnLocalPattern = new RegExp(`((${pnCharsUPattern.source})|:|[0-9]|(${plxPattern.source}))(((${pnCharsPattern.source})|\\.|:|(${plxPattern.source}))*((${pnCharsPattern.source})|:|(${plxPattern.source})))?`);
export const pnLocalPattern = new RegExp(`((${pnCharsUPattern.source})|:|[0-9]|(${plxPattern.source}))(((${pnCharsPattern.source})|\\.|:|(${plxPattern.source}))*((${pnCharsPattern.source})|:|(${plxPattern.source})))?`);
// eslint-disable-next-line no-misleading-character-class
const pNameLnPattern = new RegExp(`(${pNameNsPattern.source})(${pnLocalPattern.source})`);
export const pNameLnPattern = new RegExp(`(${pNameNsPattern.source})(${pnLocalPattern.source})`);
// eslint-disable-next-line no-misleading-character-class
const blankNodeLabelPattern = new RegExp(`_:((${pnCharsUPattern.source})|[0-9])(((${pnCharsPattern.source})\\.)*(${pnCharsPattern.source}))?`);
export const blankNodeLabelPattern = new RegExp(`_:((${pnCharsUPattern.source})|[0-9])(((${pnCharsPattern.source})\\.)*(${pnCharsPattern.source}))?`);
// eslint-disable-next-line no-misleading-character-class
const var1Pattern = new RegExp(`\\?(${varNamePattern.source})`);
export const var1Pattern = new RegExp(`\\?(${varNamePattern.source})`);
// eslint-disable-next-line no-misleading-character-class
const var2Pattern = new RegExp(`\\$(${varNamePattern.source})`);
const langTagPattern = /@[a-zA-Z]+(-[a-zA-Z0-9]+)*/;
const integerPattern = /[0-9]+/;
const decimalPattern = /[0-9]+\.[0-9]+/;
const exponentPattern = /[eE][+-]?[0-9]+/;
const doublePattern = new RegExp(`([0-9]+\\.[0-9]*(${exponentPattern.source}))|(\\.[0-9]+(${exponentPattern.source}))|([0-9]+(${exponentPattern.source}))`);
const interferePositivePattern = new RegExp(`\\+${integerPattern.source}`);
const decimalPositivePattern = new RegExp(`\\+${decimalPattern.source}`);
const doublePositivePattern = new RegExp(`\\+${doublePattern.source}`);
const integerNegativePattern = new RegExp(`-${integerPattern.source}`);
const decimalNegativePattern = new RegExp(`-${decimalPattern.source}`);
const doubleNegativePattern = new RegExp(`-${doublePattern.source}`);
const echarPattern = /\\[\\"'bfnrt]/u;
const stringLiteral1Pattern = new RegExp(`'(?:([^\\u0027\\u005C\\u000A\u000D])|(?:${echarPattern.source}))*'`);
const stringLiteral2Pattern = new RegExp(`"(?:([^\\u0022\\u005C\\u000A\\u000D])|(?:${echarPattern.source}))*"`);
const stringLiteralLong1Pattern = new RegExp(`'''(('|(''))?([^'\\\\]|(${echarPattern.source})))*'''`);
const stringLiteralLong2Pattern = new RegExp(`"""(("|(""))?([^"\\\\]|(${echarPattern.source})))*"""`);
export const var2Pattern = new RegExp(`\\$(${varNamePattern.source})`);
export const langTagPattern = /@[a-zA-Z]+(-[a-zA-Z0-9]+)*/;
export const integerPattern = /[0-9]+/;
export const decimalPattern = /[0-9]+\.[0-9]+/;
export const exponentPattern = /[eE][+-]?[0-9]+/;
export const doublePattern = new RegExp(`([0-9]+\\.[0-9]*(${exponentPattern.source}))|(\\.[0-9]+(${exponentPattern.source}))|([0-9]+(${exponentPattern.source}))`);
export const interferePositivePattern = new RegExp(`\\+${integerPattern.source}`);
export const decimalPositivePattern = new RegExp(`\\+${decimalPattern.source}`);
export const doublePositivePattern = new RegExp(`\\+${doublePattern.source}`);
export const integerNegativePattern = new RegExp(`-${integerPattern.source}`);
export const decimalNegativePattern = new RegExp(`-${decimalPattern.source}`);
export const doubleNegativePattern = new RegExp(`-${doublePattern.source}`);
export const echarPattern = /\\[\\"'bfnrt]/u;
export const stringLiteral1Pattern = new RegExp(`'(?:([^\\u0027\\u005C\\u000A\u000D])|(?:${echarPattern.source}))*'`);
export const stringLiteral2Pattern = new RegExp(`"(?:([^\\u0022\\u005C\\u000A\\u000D])|(?:${echarPattern.source}))*"`);
export const stringLiteralLong1Pattern = new RegExp(`'''(('|(''))?([^'\\\\]|(${echarPattern.source})))*'''`);
export const stringLiteralLong2Pattern = new RegExp(`"""(("|(""))?([^"\\\\]|(${echarPattern.source})))*"""`);
// eslint-disable-next-line no-control-regex
const wsPattern = /[\u0020\u0009\u000D\u000A]/;
const nilPattern = new RegExp(`\\((${wsPattern.source})*\\)`);
const anonPattern = new RegExp(`\\[(${wsPattern.source})*\\]`);
export const wsPattern = /[\u0020\u0009\u000D\u000A]/;
export const nilPattern = new RegExp(`\\((${wsPattern.source})*\\)`);
export const anonPattern = new RegExp(`\\[(${wsPattern.source})*\\]`);

export const iriRef = createToken({ name: 'IriRef', pattern: iriRefPattern });
export const pNameLn = createToken({ name: 'PNameLn', pattern: pNameLnPattern });
Expand Down
88 changes: 88 additions & 0 deletions test/statics/sparql/spaced-data-update.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{
"prefixes": {},
"type": "update",
"updates": [
{
"insert": [
{
"triples": [
{
"object": {
"datatype": {
"termType": "NamedNode",
"value": "http://www.w3.org/2001/XMLSchema#string"
},
"language": "",
"termType": "Literal",
"value": "object"
},
"predicate": {
"termType": "NamedNode",
"value": "http://example.org/predicate"
},
"subject": {
"termType": "NamedNode",
"value": "http://example.org/subject"
}
}
],
"type": "bgp"
}
],
"updateType": "insert"
},
{
"delete": [
{
"triples": [
{
"object": {
"datatype": {
"termType": "NamedNode",
"value": "http://www.w3.org/2001/XMLSchema#string"
},
"language": "",
"termType": "Literal",
"value": "object"
},
"predicate": {
"termType": "NamedNode",
"value": "http://example.org/predicate"
},
"subject": {
"termType": "NamedNode",
"value": "http://example.org/subject"
}
}
],
"type": "bgp"
}
],
"updateType": "delete"
},
{
"delete": [
{
"triples": [
{
"object": {
"termType": "Variable",
"value": "o"
},
"predicate": {
"termType": "NamedNode",
"value": "http://example.org/predicate"
},
"subject": {
"termType": "NamedNode",
"value": "http://example.org/subject"
}
}
],
"type": "bgp"
}
],
"updateType": "deletewhere"
}
]
}
11 changes: 11 additions & 0 deletions test/statics/sparql/spaced-data-update.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Basically tests Note 7 of the SPARQL 1.1 spec
INsert Data {
<http://example.org/subject> <http://example.org/predicate> "object" .
};
Delete
Data {
<http://example.org/subject> <http://example.org/predicate> "object" .
};
DeLete WHERE {
<http://example.org/subject> <http://example.org/predicate> ?o .
};

0 comments on commit 76b5313

Please sign in to comment.