From 776e8502e2fe39532ae7c8c389cb91e16e0d0d57 Mon Sep 17 00:00:00 2001 From: Mark Sujew Date: Fri, 4 Nov 2022 15:11:54 +0100 Subject: [PATCH] Use `chevrotain-allstar` for lookahead --- package-lock.json | 116 ++++++++++++++---- packages/langium/package.json | 3 +- packages/langium/src/parser/langium-parser.ts | 15 ++- .../langium/src/parser/parser-builder-base.ts | 2 +- .../src/validation/document-validator.ts | 8 +- .../parser/langium-parser-builder.test.ts | 30 +++++ 6 files changed, 137 insertions(+), 37 deletions(-) diff --git a/package-lock.json b/package-lock.json index a552b302d..52886d75d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -214,15 +214,34 @@ "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==", "dev": true }, + "node_modules/@chevrotain/cst-dts-gen": { + "version": "10.4.1", + "resolved": "https://registry.npmjs.org/@chevrotain/cst-dts-gen/-/cst-dts-gen-10.4.1.tgz", + "integrity": "sha512-wNDw9Rh6dPJKH275er8nijuDIpTcG2GjQANjnG8RaeGkZ3JN99+u6HRtnjKhjoi4NY9rg+udHChHQSskZtlkPw==", + "dependencies": { + "@chevrotain/gast": "10.4.1", + "@chevrotain/types": "10.4.1", + "lodash": "4.17.21" + } + }, + "node_modules/@chevrotain/gast": { + "version": "10.4.1", + "resolved": "https://registry.npmjs.org/@chevrotain/gast/-/gast-10.4.1.tgz", + "integrity": "sha512-HRv66QVbmC7eb/ppwsPCfNH4oZ/VV+thuMZILm7A7W6Q5M0tqiZv0ecdiB8hydmPO8je0aSrXEOCcaA6fuXc3Q==", + "dependencies": { + "@chevrotain/types": "10.4.1", + "lodash": "4.17.21" + } + }, "node_modules/@chevrotain/types": { - "version": "9.1.0", - "resolved": "https://registry.npmjs.org/@chevrotain/types/-/types-9.1.0.tgz", - "integrity": "sha512-3hbCD1CThkv9gnaSIPq0GUXwKni68e0ph6jIHwCvcWiQ4JB2xi8bFxBain0RF04qHUWuDjgnZLj4rLgimuGO+g==" + "version": "10.4.1", + "resolved": "https://registry.npmjs.org/@chevrotain/types/-/types-10.4.1.tgz", + "integrity": "sha512-J8iyZNn/RGYWSyNJdGd3QI01gKFUx4mCSM0+vEqmIw9TXFlxj1IsHteXFahtezSHjgMtBTqWn6hb2YxCLjpHVg==" }, "node_modules/@chevrotain/utils": { - "version": "9.1.0", - "resolved": "https://registry.npmjs.org/@chevrotain/utils/-/utils-9.1.0.tgz", - "integrity": "sha512-llLJZ8OAlZrjGlBvamm6Zdo/HmGAcCLq5gx7cSwUX8No+n/8ip+oaC4x33IdZIif8+Rh5dQUIZXmfbSghiOmNQ==" + "version": "10.4.1", + "resolved": "https://registry.npmjs.org/@chevrotain/utils/-/utils-10.4.1.tgz", + "integrity": "sha512-vPIgzES8QhHMchb5UaQ4V/c9xmoaECN+4EXpuhWE+pu3LXJUUtAwDn/SEKFgtyiRo269Hxv3b0NbPlQfH0jeVA==" }, "node_modules/@esbuild/linux-loong64": { "version": "0.15.7", @@ -1287,15 +1306,27 @@ } }, "node_modules/chevrotain": { - "version": "9.1.0", - "resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-9.1.0.tgz", - "integrity": "sha512-A86/55so63HCfu0dgGg3j9u8uuuBOrSqly1OhBZxRu2x6sAKILLzfVjbGMw45kgier6lz45EzcjjWtTRgoT84Q==", - "dependencies": { - "@chevrotain/types": "^9.1.0", - "@chevrotain/utils": "^9.1.0", + "version": "10.4.1", + "resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-10.4.1.tgz", + "integrity": "sha512-1y4vnssauVmrrP5MBaJ6DZvsv3BpXLlKVNK5S52fTGQHqg09qxMDBAz0wZbb04Ovc1pBCA4obcCjOlRioIV+cA==", + "dependencies": { + "@chevrotain/cst-dts-gen": "10.4.1", + "@chevrotain/gast": "10.4.1", + "@chevrotain/types": "10.4.1", + "@chevrotain/utils": "10.4.1", + "lodash": "4.17.21", "regexp-to-ast": "0.5.0" } }, + "node_modules/chevrotain-allstar": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/chevrotain-allstar/-/chevrotain-allstar-0.1.1.tgz", + "integrity": "sha512-5uHtMrgYO693esxm3VU37GKdTPdwejdZzyFptmom2FUFXZRTv/qo0X+sXZcgJeMK4JXu/G9G3EZzTSXljKBu/w==", + "dependencies": { + "chevrotain": "^10.4.1", + "lodash": "^4.17.21" + } + }, "node_modules/ci-info": { "version": "3.4.0", "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.4.0.tgz", @@ -4554,7 +4585,8 @@ "version": "0.5.0", "license": "MIT", "dependencies": { - "chevrotain": "^9.1.0", + "chevrotain": "^10.4.1", + "chevrotain-allstar": "^0.1.1", "vscode-languageserver": "^8.0.2", "vscode-languageserver-textdocument": "^1.0.7", "vscode-uri": "^3.0.2" @@ -4701,15 +4733,34 @@ "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==", "dev": true }, + "@chevrotain/cst-dts-gen": { + "version": "10.4.1", + "resolved": "https://registry.npmjs.org/@chevrotain/cst-dts-gen/-/cst-dts-gen-10.4.1.tgz", + "integrity": "sha512-wNDw9Rh6dPJKH275er8nijuDIpTcG2GjQANjnG8RaeGkZ3JN99+u6HRtnjKhjoi4NY9rg+udHChHQSskZtlkPw==", + "requires": { + "@chevrotain/gast": "10.4.1", + "@chevrotain/types": "10.4.1", + "lodash": "4.17.21" + } + }, + "@chevrotain/gast": { + "version": "10.4.1", + "resolved": "https://registry.npmjs.org/@chevrotain/gast/-/gast-10.4.1.tgz", + "integrity": "sha512-HRv66QVbmC7eb/ppwsPCfNH4oZ/VV+thuMZILm7A7W6Q5M0tqiZv0ecdiB8hydmPO8je0aSrXEOCcaA6fuXc3Q==", + "requires": { + "@chevrotain/types": "10.4.1", + "lodash": "4.17.21" + } + }, "@chevrotain/types": { - "version": "9.1.0", - "resolved": "https://registry.npmjs.org/@chevrotain/types/-/types-9.1.0.tgz", - "integrity": "sha512-3hbCD1CThkv9gnaSIPq0GUXwKni68e0ph6jIHwCvcWiQ4JB2xi8bFxBain0RF04qHUWuDjgnZLj4rLgimuGO+g==" + "version": "10.4.1", + "resolved": "https://registry.npmjs.org/@chevrotain/types/-/types-10.4.1.tgz", + "integrity": "sha512-J8iyZNn/RGYWSyNJdGd3QI01gKFUx4mCSM0+vEqmIw9TXFlxj1IsHteXFahtezSHjgMtBTqWn6hb2YxCLjpHVg==" }, "@chevrotain/utils": { - "version": "9.1.0", - "resolved": "https://registry.npmjs.org/@chevrotain/utils/-/utils-9.1.0.tgz", - "integrity": "sha512-llLJZ8OAlZrjGlBvamm6Zdo/HmGAcCLq5gx7cSwUX8No+n/8ip+oaC4x33IdZIif8+Rh5dQUIZXmfbSghiOmNQ==" + "version": "10.4.1", + "resolved": "https://registry.npmjs.org/@chevrotain/utils/-/utils-10.4.1.tgz", + "integrity": "sha512-vPIgzES8QhHMchb5UaQ4V/c9xmoaECN+4EXpuhWE+pu3LXJUUtAwDn/SEKFgtyiRo269Hxv3b0NbPlQfH0jeVA==" }, "@esbuild/linux-loong64": { "version": "0.15.7", @@ -5558,15 +5609,27 @@ "dev": true }, "chevrotain": { - "version": "9.1.0", - "resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-9.1.0.tgz", - "integrity": "sha512-A86/55so63HCfu0dgGg3j9u8uuuBOrSqly1OhBZxRu2x6sAKILLzfVjbGMw45kgier6lz45EzcjjWtTRgoT84Q==", - "requires": { - "@chevrotain/types": "^9.1.0", - "@chevrotain/utils": "^9.1.0", + "version": "10.4.1", + "resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-10.4.1.tgz", + "integrity": "sha512-1y4vnssauVmrrP5MBaJ6DZvsv3BpXLlKVNK5S52fTGQHqg09qxMDBAz0wZbb04Ovc1pBCA4obcCjOlRioIV+cA==", + "requires": { + "@chevrotain/cst-dts-gen": "10.4.1", + "@chevrotain/gast": "10.4.1", + "@chevrotain/types": "10.4.1", + "@chevrotain/utils": "10.4.1", + "lodash": "4.17.21", "regexp-to-ast": "0.5.0" } }, + "chevrotain-allstar": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/chevrotain-allstar/-/chevrotain-allstar-0.1.1.tgz", + "integrity": "sha512-5uHtMrgYO693esxm3VU37GKdTPdwejdZzyFptmom2FUFXZRTv/qo0X+sXZcgJeMK4JXu/G9G3EZzTSXljKBu/w==", + "requires": { + "chevrotain": "^10.4.1", + "lodash": "^4.17.21" + } + }, "ci-info": { "version": "3.4.0", "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.4.0.tgz", @@ -6555,7 +6618,8 @@ "langium": { "version": "file:packages/langium", "requires": { - "chevrotain": "^9.1.0", + "chevrotain": "^10.4.1", + "chevrotain-allstar": "^0.1.1", "langium-cli": "0.5.0", "vscode-languageserver": "^8.0.2", "vscode-languageserver-textdocument": "^1.0.7", diff --git a/packages/langium/package.json b/packages/langium/package.json index 29cf6eab1..ceb1c0657 100644 --- a/packages/langium/package.json +++ b/packages/langium/package.json @@ -36,7 +36,8 @@ "publish:latest": "npm publish --tag latest" }, "dependencies": { - "chevrotain": "^9.1.0", + "chevrotain": "^10.4.1", + "chevrotain-allstar": "^0.1.1", "vscode-languageserver": "^8.0.2", "vscode-languageserver-textdocument": "^1.0.7", "vscode-uri": "^3.0.2" diff --git a/packages/langium/src/parser/langium-parser.ts b/packages/langium/src/parser/langium-parser.ts index 14a3d38c8..d175dc5cf 100644 --- a/packages/langium/src/parser/langium-parser.ts +++ b/packages/langium/src/parser/langium-parser.ts @@ -5,7 +5,8 @@ ******************************************************************************/ /* eslint-disable @typescript-eslint/no-explicit-any */ -import { defaultParserErrorProvider, DSLMethodOpts, EmbeddedActionsParser, ILexingError, IOrAlt, IParserErrorMessageProvider, IRecognitionException, IToken, TokenType, TokenVocabulary } from 'chevrotain'; +import { defaultParserErrorProvider, DSLMethodOpts, EmbeddedActionsParser, ILexingError, IOrAlt, IParserErrorMessageProvider, IRecognitionException, IToken, LLkLookaheadStrategy, TokenType, TokenVocabulary } from 'chevrotain'; +import { LLStarLookaheadStrategy } from 'chevrotain-allstar'; import { AbstractElement, Action, Assignment, isAssignment, isCrossReference, isKeyword, ParserRule } from '../grammar/generated/ast'; import { getTypeName, isDataTypeRule } from '../grammar/internal-grammar-util'; import { Linker } from '../references/linker'; @@ -37,7 +38,7 @@ function isDataTypeNode(node: { $type: string | symbol | undefined }): node is D return node.$type === DatatypeSymbol; } -type RuleResult = () => any; +type RuleResult = (args: Args) => any; type Args = Record; @@ -150,7 +151,7 @@ export class LangiumParser extends AbstractLangiumParser { this.nodeBuilder.buildRootNode(input); const lexerResult = this.lexer.tokenize(input); this.wrapper.input = lexerResult.tokens; - const result = this.mainRule.call(this.wrapper); + const result = this.mainRule.call(this.wrapper, {}); this.nodeBuilder.addHiddenTokens(lexerResult.hidden); this.unorderedGroups.clear(); return { @@ -417,7 +418,7 @@ export class LangiumCompletionParser extends AbstractLangiumParser { const tokens = this.lexer.tokenize(input); this.tokens = tokens.tokens; this.wrapper.input = [...this.tokens]; - this.mainRule.call(this.wrapper); + this.mainRule.call(this.wrapper, {}); this.unorderedGroups.clear(); return { tokens: this.tokens, @@ -518,9 +519,13 @@ class ChevrotainWrapper extends EmbeddedActionsParser { definitionErrors: IParserDefinitionError[]; constructor(tokens: TokenVocabulary, config?: IParserConfig) { + const useDefaultLookahead = config && 'maxLookahead' in config; super(tokens, { ...defaultConfig, - ...config + ...config, + lookaheadStrategy: useDefaultLookahead + ? new LLkLookaheadStrategy({ maxLookahead: config.maxLookahead }) + : new LLStarLookaheadStrategy() }); } diff --git a/packages/langium/src/parser/parser-builder-base.ts b/packages/langium/src/parser/parser-builder-base.ts index 60326757f..04862f7bc 100644 --- a/packages/langium/src/parser/parser-builder-base.ts +++ b/packages/langium/src/parser/parser-builder-base.ts @@ -28,7 +28,7 @@ type ParserContext = { ruleNames: Map } -type Rule = () => unknown; +type Rule = (args: Args) => unknown; type Args = Record; diff --git a/packages/langium/src/validation/document-validator.ts b/packages/langium/src/validation/document-validator.ts index 6779b2716..e40e350c1 100644 --- a/packages/langium/src/validation/document-validator.ts +++ b/packages/langium/src/validation/document-validator.ts @@ -50,12 +50,12 @@ export class DefaultDocumentValidator implements DocumentValidator { severity: DiagnosticSeverity.Error, range: { start: { - line: lexerError.line - 1, - character: lexerError.column - 1 + line: lexerError.line! - 1, + character: lexerError.column! - 1 }, end: { - line: lexerError.line - 1, - character: lexerError.column + lexerError.length - 1 + line: lexerError.line! - 1, + character: lexerError.column! + lexerError.length - 1 } }, message: lexerError.message, diff --git a/packages/langium/test/parser/langium-parser-builder.test.ts b/packages/langium/test/parser/langium-parser-builder.test.ts index e797dde43..127a65005 100644 --- a/packages/langium/test/parser/langium-parser-builder.test.ts +++ b/packages/langium/test/parser/langium-parser-builder.test.ts @@ -567,6 +567,36 @@ describe('MultiMode Lexing', () => { }); +describe('ALL(*) parser', () => { + + const grammar = ` + grammar UnboundedLookahead + + entry Entry: A | B; + + // Potentially unlimited amount of 'a' tokens + A: {infer A} 'a'* 'b'; + B: {infer B} 'a'* 'c'; + + hidden terminal WS: /\\s+/;`; + + const parser = parserFromGrammar(grammar); + + test('can parse with unbounded lookahead #1', () => { + const result = parser.parse('aaaaaaaaaab'); + expect(result.lexerErrors).toHaveLength(0); + expect(result.parserErrors).toHaveLength(0); + expect(result.value.$type).toBe('A'); + }); + + test('can parse with unbounded lookahead #2', () => { + const result = parser.parse('aaaaaaaaaaaaaac'); + expect(result.lexerErrors).toHaveLength(0); + expect(result.parserErrors).toHaveLength(0); + expect(result.value.$type).toBe('B'); + }); +}); + function parserFromGrammar(grammar: string): LangiumParser { return createServicesForGrammar({ grammar }).parser.LangiumParser; }