Skip to content

Commit

Permalink
Streamline lexer plumbing and simplify error recovert wrt that (#620)
Browse files Browse the repository at this point in the history
Ref #608

Nothing ground-breaking but simplifies some of the machinery involved.
  • Loading branch information
Xanewok authored Oct 31, 2023
1 parent 7bb650b commit 18f590f
Show file tree
Hide file tree
Showing 62 changed files with 4,338 additions and 2,683 deletions.
135 changes: 51 additions & 84 deletions crates/codegen/parser/generator/src/parser_definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ pub trait ParserDefinitionNodeExtensions {

impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
fn to_parser_code(&self, context_name: &'static str, is_trivia: bool) -> TokenStream {
let context = format_ident!("{context_name}");
let lex_ctx = quote! { LexicalContextType::#context };

match self {
Self::Versioned(body, _, _) => body.to_parser_code(context_name, is_trivia),

Expand Down Expand Up @@ -102,91 +105,71 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {

Self::ScannerDefinition(scanner_definition, _) => {
let kind = format_ident!("{name}", name = scanner_definition.name());
if is_trivia {
let function_name =
format_ident!("{}_parse_token", context_name.to_snake_case());
quote! {
self.#function_name(input, TokenKind::#kind)
}

let parse_token = if is_trivia {
format_ident!("parse_token")
} else {
let function_name =
format_ident!("{}_parse_token_with_trivia", context_name.to_snake_case());
quote! {
self.#function_name(input, TokenKind::#kind)
}
format_ident!("parse_token_with_trivia")
};

quote! {
self.#parse_token::<#lex_ctx>(input, TokenKind::#kind)
}
}

Self::TriviaParserDefinition(trivia_parser_definition, _) => {
let function_name = format_ident!(
"{snake_case}",
snake_case = trivia_parser_definition.name().to_snake_case()
);
let function_name =
format_ident!("{}", trivia_parser_definition.name().to_snake_case());

quote! { self.#function_name(input) }
}

Self::ParserDefinition(parser_definition, _) => {
if is_trivia {
unreachable!(
"Trivia productions can only reference trivia or token productions"
)
}
assert!(
!is_trivia,
"Trivia productions can only reference trivia or token productions"
);

if parser_definition.is_inline() {
parser_definition.to_parser_code()
} else {
let function_name = format_ident!(
"{snake_case}",
snake_case = parser_definition.name().to_snake_case()
);
let function_name =
format_ident!("{}", parser_definition.name().to_snake_case());

quote! {
self.#function_name(input)
}
}
}

Self::PrecedenceParserDefinition(precedence_parser_definition, _) => {
if is_trivia {
unreachable!(
"Trivia productions can only reference trivia or token productions"
)
}
let function_name = format_ident!(
"{snake_case}",
snake_case = precedence_parser_definition.name().to_snake_case()
assert!(
!is_trivia,
"Trivia productions can only reference trivia or token productions"
);

let function_name =
format_ident!("{}", precedence_parser_definition.name().to_snake_case());

quote! { self.#function_name(input) }
}

Self::DelimitedBy(open, body, close, _) => {
let [open_token, close_token] = match (open.as_ref(), close.as_ref()) {
let [open_delim, close_delim] = match (open.as_ref(), close.as_ref()) {
(
ParserDefinitionNode::ScannerDefinition(open, ..),
ParserDefinitionNode::ScannerDefinition(close, ..),
) => [open, close].map(|scanner| format_ident!("{}", scanner.name())),
_ => unreachable!("Only tokens are permitted as delimiters"),
};

let parse_token = format_ident!(
"{context_name}_parse_token_with_trivia",
context_name = context_name.to_snake_case()
);

let delimiters = format_ident!(
"{context_name}_delimiters",
context_name = context_name.to_snake_case()
);

let context = format_ident!("{context_name}");

let parser = body.to_parser_code(context_name, is_trivia);
let body_parser = body.applicable_version_quality_ranges().wrap_code(
quote! {
seq.elem(#parser
.recover_until_with_nested_delims(input,
|input| Lexer::next_token::<{ LexicalContext::#context as u8 }>(self, input),
|input| Lexer::leading_trivia(self, input),
TokenKind::#close_token,
Self::#delimiters(),
.recover_until_with_nested_delims::<_, #lex_ctx>(input,
self,
TokenKind::#close_delim,
RecoverFromNoMatch::Yes,
)
)?;
Expand All @@ -196,67 +179,51 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {

quote! {
SequenceHelper::run(|mut seq| {
let mut delim_guard = input.open_delim(TokenKind::#close_token);
let mut delim_guard = input.open_delim(TokenKind::#close_delim);
let input = delim_guard.ctx();

seq.elem(self.#parse_token(input, TokenKind::#open_token))?;
seq.elem(self.parse_token_with_trivia::<#lex_ctx>(input, TokenKind::#open_delim))?;
#body_parser
seq.elem(self.#parse_token(input, TokenKind::#close_token))?;
seq.elem(self.parse_token_with_trivia::<#lex_ctx>(input, TokenKind::#close_delim))?;
seq.finish()
})
}
}

Self::SeparatedBy(body, separator, _) => {
let separator_scanner = match separator.as_ref() {
ParserDefinitionNode::ScannerDefinition(scanner, ..) => scanner,
let separator = match separator.as_ref() {
ParserDefinitionNode::ScannerDefinition(scanner, ..) => {
format_ident!("{name}", name = scanner.name())
}
_ => unreachable!("Only tokens are permitted as separators"),
};

let separator_token_kind = format_ident!("{name}", name = separator_scanner.name());
let context = format_ident!("{context_name}");

let parser = body.to_parser_code(context_name, is_trivia);

quote! {
SeparatedHelper::run::<{ LexicalContext::#context as u8}, Self>(
SeparatedHelper::run::<_, #lex_ctx>(
input,
|input| #parser,
TokenKind::#separator_token_kind,
self,
|input| #parser,
TokenKind::#separator,
)
}
}
Self::TerminatedBy(body, terminator, _) => {
let terminator_scanner = match terminator.as_ref() {
ParserDefinitionNode::ScannerDefinition(scanner, ..) => scanner,
let terminator = match terminator.as_ref() {
ParserDefinitionNode::ScannerDefinition(scanner, ..) => {
format_ident!("{name}", name = scanner.name())
}
_ => unreachable!("Only tokens are permitted as terminators"),
};

let terminator_token_kind =
format_ident!("{name}", name = terminator_scanner.name());

let context = format_ident!("{context_name}");

let delimiters = format_ident!(
"{context_name}_delimiters",
context_name = context_name.to_snake_case()
);

let parse_token = format_ident!(
"{context_name}_parse_token_with_trivia",
context_name = context_name.to_snake_case()
);

let parser = body.to_parser_code(context_name, is_trivia);
let body_parser = body.applicable_version_quality_ranges().wrap_code(
quote! {
seq.elem(#parser
.recover_until_with_nested_delims(input,
|input| Lexer::next_token::<{ LexicalContext::#context as u8 }>(self, input),
|input| Lexer::leading_trivia(self, input),
TokenKind::#terminator_token_kind,
Self::#delimiters(),
.recover_until_with_nested_delims::<_, #lex_ctx>(input,
self,
TokenKind::#terminator,
RecoverFromNoMatch::No,
)
)?;
Expand All @@ -267,7 +234,7 @@ impl ParserDefinitionNodeExtensions for ParserDefinitionNode {
quote! {
SequenceHelper::run(|mut seq| {
#body_parser
seq.elem(self.#parse_token(input, TokenKind::#terminator_token_kind))?;
seq.elem(self.parse_token_with_trivia::<#lex_ctx>(input, TokenKind::#terminator))?;
seq.finish()
})
}
Expand Down
10 changes: 9 additions & 1 deletion crates/codegen/parser/runtime/src/kinds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,15 @@ pub enum ProductionKind {
#[derive(strum_macros::FromRepr)]
#[cfg_attr(feature = "slang_napi_interfaces", /* derives `Clone` and `Copy` */ napi(string_enum, namespace = "language"))]
#[cfg_attr(not(feature = "slang_napi_interfaces"), derive(Clone, Copy))]
#[repr(u8)] // This is used as a const fn argument, which only supports primitive types
pub enum LexicalContext {
XXX,
}

/// Marker trait for type-level [`LexicalContext`] variants.
pub trait IsLexicalContext {
/// Returns a run-time [`LexicalContext`] value.
fn value() -> LexicalContext;
}

#[allow(non_snake_case)]
pub mod LexicalContextType {}
43 changes: 28 additions & 15 deletions crates/codegen/parser/runtime/src/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,41 +1,52 @@
use crate::{
cst,
kinds::{LexicalContext, TokenKind},
kinds::{IsLexicalContext, TokenKind},
support::{ParserContext, ParserResult},
};

// Ensure that the `LexicalContext` enum is `repr(u8)`.
// Workaround until repr(u8) enums can be used as const params.
const _ASSERT_CONTEXT_IS_REPR_U8: fn() = || {
let _ = core::mem::transmute::<u8, LexicalContext>;
};

pub trait Lexer {
// Generated by the templating engine
#[doc(hidden)]
fn next_token<const LEX_CTX: u8>(&self, input: &mut ParserContext) -> Option<TokenKind>;
fn next_token<LexCtx: IsLexicalContext>(&self, input: &mut ParserContext) -> Option<TokenKind>;
// NOTE: These are context-insensitive
#[doc(hidden)]
fn leading_trivia(&self, input: &mut ParserContext) -> ParserResult;
#[doc(hidden)]
fn trailing_trivia(&self, input: &mut ParserContext) -> ParserResult;
#[doc(hidden)]
fn delimiters<const LEX_CTX: u8>() -> &'static [(TokenKind, TokenKind)];
/// Returns valid grouping delimiters in the given lexical context.
fn delimiters<LexCtx: IsLexicalContext>() -> &'static [(TokenKind, TokenKind)];

fn peek_token<const LEX_CTX: u8>(&self, input: &mut ParserContext) -> Option<TokenKind> {
/// Peeks the next token, including trivia. Does not advance the input.
fn peek_token<LexCtx: IsLexicalContext>(&self, input: &mut ParserContext) -> Option<TokenKind> {
let start = input.position();
let token = self.next_token::<LEX_CTX>(input);
let token = self.next_token::<LexCtx>(input);
input.set_position(start);
token
}

/// Peeks the next significant (i.e. non-trivia) token. Does not advance the input.
fn peek_token_with_trivia<LexCtx: IsLexicalContext>(
&self,
input: &mut ParserContext,
) -> Option<TokenKind> {
let start = input.position();

let _ = self.leading_trivia(input);
let token = self.next_token::<LexCtx>(input);

input.set_position(start);
token
}

fn parse_token<const LEX_CTX: u8>(
/// Attempts to consume the next expected token. Advances the input only if the token matches.
fn parse_token<LexCtx: IsLexicalContext>(
&self,
input: &mut ParserContext,
kind: TokenKind,
) -> ParserResult {
let start = input.position();
if self.next_token::<LEX_CTX>(input) != Some(kind) {
if self.next_token::<LexCtx>(input) != Some(kind) {
input.set_position(start);
return ParserResult::no_match(vec![kind]);
}
Expand All @@ -47,7 +58,9 @@ pub trait Lexer {
)
}

fn parse_token_with_trivia<const LEX_CTX: u8>(
/// Attempts to consume the next significant token including both leading and trailing trivia.
/// Advances the input only if the token matches.
fn parse_token_with_trivia<LexCtx: IsLexicalContext>(
&self,
input: &mut ParserContext,
kind: TokenKind,
Expand All @@ -62,7 +75,7 @@ pub trait Lexer {
}

let start = input.position();
if self.next_token::<LEX_CTX>(input) != Some(kind) {
if self.next_token::<LexCtx>(input) != Some(kind) {
input.set_position(restore);
return ParserResult::no_match(vec![kind]);
}
Expand Down
Loading

0 comments on commit 18f590f

Please sign in to comment.