diff --git a/.github/.generated_ast_watch_list.yml b/.github/.generated_ast_watch_list.yml index 8246bb39c34cdd..1968538da11d47 100644 --- a/.github/.generated_ast_watch_list.yml +++ b/.github/.generated_ast_watch_list.yml @@ -18,6 +18,7 @@ src: - 'crates/oxc_ast/src/generated/derive_get_span.rs' - 'crates/oxc_ast/src/generated/derive_get_span_mut.rs' - 'crates/oxc_ast/src/generated/get_id.rs' + - 'crates/oxc_ast/src/generated/utf8_to_utf16_converter.rs' - 'crates/oxc_ast/src/generated/visit.rs' - 'crates/oxc_ast/src/generated/visit_mut.rs' - 'crates/oxc_ast/src/serialize.rs' diff --git a/crates/oxc_ast/src/generated/utf8_to_utf16_converter.rs b/crates/oxc_ast/src/generated/utf8_to_utf16_converter.rs new file mode 100644 index 00000000000000..1dff3c2d102e26 --- /dev/null +++ b/crates/oxc_ast/src/generated/utf8_to_utf16_converter.rs @@ -0,0 +1,1198 @@ +// Auto-generated code, DO NOT EDIT DIRECTLY! +// To edit this generated file you have to edit `tasks/ast_tools/src/generators/utf8_to_utf16.rs` + +use oxc_span::GetSpan; +use oxc_syntax::scope::ScopeFlags; + +use crate::{ + ast::*, + utf8_to_utf16::Utf8ToUtf16Converter, + visit::{VisitMut, walk_mut}, +}; + +impl<'a> VisitMut<'a> for Utf8ToUtf16Converter<'_> { + fn visit_program(&mut self, it: &mut Program<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_program(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_identifier_name(&mut self, it: &mut IdentifierName<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_identifier_name(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_identifier_reference(&mut self, it: &mut IdentifierReference<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_identifier_reference(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_binding_identifier(&mut self, it: &mut BindingIdentifier<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_binding_identifier(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_label_identifier(&mut self, it: &mut LabelIdentifier<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_label_identifier(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_this_expression(&mut self, it: &mut ThisExpression) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_this_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_array_expression(&mut self, it: &mut ArrayExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_array_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_elision(&mut self, it: &mut Elision) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_elision(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_object_expression(&mut self, it: &mut ObjectExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_object_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_tagged_template_expression(&mut self, it: &mut TaggedTemplateExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_tagged_template_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_template_element(&mut self, it: &mut TemplateElement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_template_element(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_computed_member_expression(&mut self, it: &mut ComputedMemberExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_computed_member_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_static_member_expression(&mut self, it: &mut StaticMemberExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_static_member_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_private_field_expression(&mut self, it: &mut PrivateFieldExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_private_field_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_call_expression(&mut self, it: &mut CallExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_call_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_new_expression(&mut self, it: &mut NewExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_new_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_meta_property(&mut self, it: &mut MetaProperty<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_meta_property(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_spread_element(&mut self, it: &mut SpreadElement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_spread_element(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_update_expression(&mut self, it: &mut UpdateExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_update_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_unary_expression(&mut self, it: &mut UnaryExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_unary_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_binary_expression(&mut self, it: &mut BinaryExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_binary_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_private_in_expression(&mut self, it: &mut PrivateInExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_private_in_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_logical_expression(&mut self, it: &mut LogicalExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_logical_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_conditional_expression(&mut self, it: &mut ConditionalExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_conditional_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_assignment_expression(&mut self, it: &mut AssignmentExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_assignment_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_array_assignment_target(&mut self, it: &mut ArrayAssignmentTarget<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_array_assignment_target(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_object_assignment_target(&mut self, it: &mut ObjectAssignmentTarget<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_object_assignment_target(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_assignment_target_rest(&mut self, it: &mut AssignmentTargetRest<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_assignment_target_rest(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_assignment_target_with_default(&mut self, it: &mut AssignmentTargetWithDefault<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_assignment_target_with_default(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_assignment_target_property_identifier( + &mut self, + it: &mut AssignmentTargetPropertyIdentifier<'a>, + ) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_assignment_target_property_identifier(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_assignment_target_property_property( + &mut self, + it: &mut AssignmentTargetPropertyProperty<'a>, + ) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_assignment_target_property_property(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_sequence_expression(&mut self, it: &mut SequenceExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_sequence_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_super(&mut self, it: &mut Super) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_super(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_await_expression(&mut self, it: &mut AwaitExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_await_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_chain_expression(&mut self, it: &mut ChainExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_chain_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_parenthesized_expression(&mut self, it: &mut ParenthesizedExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_parenthesized_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_directive(&mut self, it: &mut Directive<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_directive(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_hashbang(&mut self, it: &mut Hashbang<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_hashbang(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_block_statement(&mut self, it: &mut BlockStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_block_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_variable_declaration(&mut self, it: &mut VariableDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_variable_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_variable_declarator(&mut self, it: &mut VariableDeclarator<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_variable_declarator(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_empty_statement(&mut self, it: &mut EmptyStatement) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_empty_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_expression_statement(&mut self, it: &mut ExpressionStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_expression_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_if_statement(&mut self, it: &mut IfStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_if_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_do_while_statement(&mut self, it: &mut DoWhileStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_do_while_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_while_statement(&mut self, it: &mut WhileStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_while_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_for_statement(&mut self, it: &mut ForStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_for_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_for_in_statement(&mut self, it: &mut ForInStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_for_in_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_for_of_statement(&mut self, it: &mut ForOfStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_for_of_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_continue_statement(&mut self, it: &mut ContinueStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_continue_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_break_statement(&mut self, it: &mut BreakStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_break_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_return_statement(&mut self, it: &mut ReturnStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_return_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_with_statement(&mut self, it: &mut WithStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_with_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_switch_statement(&mut self, it: &mut SwitchStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_switch_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_switch_case(&mut self, it: &mut SwitchCase<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_switch_case(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_labeled_statement(&mut self, it: &mut LabeledStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_labeled_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_throw_statement(&mut self, it: &mut ThrowStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_throw_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_try_statement(&mut self, it: &mut TryStatement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_try_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_catch_clause(&mut self, it: &mut CatchClause<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_catch_clause(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_catch_parameter(&mut self, it: &mut CatchParameter<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_catch_parameter(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_debugger_statement(&mut self, it: &mut DebuggerStatement) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_debugger_statement(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_assignment_pattern(&mut self, it: &mut AssignmentPattern<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_assignment_pattern(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_object_pattern(&mut self, it: &mut ObjectPattern<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_object_pattern(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_array_pattern(&mut self, it: &mut ArrayPattern<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_array_pattern(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_binding_rest_element(&mut self, it: &mut BindingRestElement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_binding_rest_element(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_function(&mut self, it: &mut Function<'a>, flags: ScopeFlags) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_function(self, it, flags); + self.convert_offset(&mut it.span.end); + } + + fn visit_formal_parameters(&mut self, it: &mut FormalParameters<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_formal_parameters(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_formal_parameter(&mut self, it: &mut FormalParameter<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_formal_parameter(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_function_body(&mut self, it: &mut FunctionBody<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_function_body(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_arrow_function_expression(&mut self, it: &mut ArrowFunctionExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_arrow_function_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_yield_expression(&mut self, it: &mut YieldExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_yield_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_class(&mut self, it: &mut Class<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_class(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_class_body(&mut self, it: &mut ClassBody<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_class_body(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_method_definition(&mut self, it: &mut MethodDefinition<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_method_definition(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_property_definition(&mut self, it: &mut PropertyDefinition<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_property_definition(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_private_identifier(&mut self, it: &mut PrivateIdentifier<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_private_identifier(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_static_block(&mut self, it: &mut StaticBlock<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_static_block(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_accessor_property(&mut self, it: &mut AccessorProperty<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_accessor_property(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_import_expression(&mut self, it: &mut ImportExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_import_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_import_declaration(&mut self, it: &mut ImportDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_import_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_import_default_specifier(&mut self, it: &mut ImportDefaultSpecifier<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_import_default_specifier(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_import_namespace_specifier(&mut self, it: &mut ImportNamespaceSpecifier<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_import_namespace_specifier(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_import_attribute(&mut self, it: &mut ImportAttribute<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_import_attribute(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_export_named_declaration(&mut self, it: &mut ExportNamedDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_export_named_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_export_default_declaration(&mut self, it: &mut ExportDefaultDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_export_default_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_export_all_declaration(&mut self, it: &mut ExportAllDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_export_all_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_boolean_literal(&mut self, it: &mut BooleanLiteral) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_boolean_literal(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_null_literal(&mut self, it: &mut NullLiteral) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_null_literal(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_numeric_literal(&mut self, it: &mut NumericLiteral<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_numeric_literal(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_string_literal(&mut self, it: &mut StringLiteral<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_string_literal(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_big_int_literal(&mut self, it: &mut BigIntLiteral<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_big_int_literal(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_reg_exp_literal(&mut self, it: &mut RegExpLiteral<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_reg_exp_literal(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_element(&mut self, it: &mut JSXElement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_element(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_opening_element(&mut self, it: &mut JSXOpeningElement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_opening_element(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_closing_element(&mut self, it: &mut JSXClosingElement<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_closing_element(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_fragment(&mut self, it: &mut JSXFragment<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_fragment(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_opening_fragment(&mut self, it: &mut JSXOpeningFragment) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_opening_fragment(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_closing_fragment(&mut self, it: &mut JSXClosingFragment) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_closing_fragment(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_namespaced_name(&mut self, it: &mut JSXNamespacedName<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_namespaced_name(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_member_expression(&mut self, it: &mut JSXMemberExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_member_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_expression_container(&mut self, it: &mut JSXExpressionContainer<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_expression_container(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_empty_expression(&mut self, it: &mut JSXEmptyExpression) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_empty_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_attribute(&mut self, it: &mut JSXAttribute<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_attribute(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_spread_attribute(&mut self, it: &mut JSXSpreadAttribute<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_spread_attribute(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_identifier(&mut self, it: &mut JSXIdentifier<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_identifier(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_spread_child(&mut self, it: &mut JSXSpreadChild<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_spread_child(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_jsx_text(&mut self, it: &mut JSXText<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_jsx_text(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_this_parameter(&mut self, it: &mut TSThisParameter<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_this_parameter(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_enum_declaration(&mut self, it: &mut TSEnumDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_enum_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_enum_member(&mut self, it: &mut TSEnumMember<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_enum_member(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_type_annotation(&mut self, it: &mut TSTypeAnnotation<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_type_annotation(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_literal_type(&mut self, it: &mut TSLiteralType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_literal_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_conditional_type(&mut self, it: &mut TSConditionalType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_conditional_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_union_type(&mut self, it: &mut TSUnionType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_union_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_intersection_type(&mut self, it: &mut TSIntersectionType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_intersection_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_parenthesized_type(&mut self, it: &mut TSParenthesizedType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_parenthesized_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_type_operator(&mut self, it: &mut TSTypeOperator<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_type_operator(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_array_type(&mut self, it: &mut TSArrayType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_array_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_indexed_access_type(&mut self, it: &mut TSIndexedAccessType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_indexed_access_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_tuple_type(&mut self, it: &mut TSTupleType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_tuple_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_named_tuple_member(&mut self, it: &mut TSNamedTupleMember<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_named_tuple_member(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_optional_type(&mut self, it: &mut TSOptionalType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_optional_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_rest_type(&mut self, it: &mut TSRestType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_rest_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_any_keyword(&mut self, it: &mut TSAnyKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_any_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_string_keyword(&mut self, it: &mut TSStringKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_string_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_boolean_keyword(&mut self, it: &mut TSBooleanKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_boolean_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_number_keyword(&mut self, it: &mut TSNumberKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_number_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_never_keyword(&mut self, it: &mut TSNeverKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_never_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_intrinsic_keyword(&mut self, it: &mut TSIntrinsicKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_intrinsic_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_unknown_keyword(&mut self, it: &mut TSUnknownKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_unknown_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_null_keyword(&mut self, it: &mut TSNullKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_null_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_undefined_keyword(&mut self, it: &mut TSUndefinedKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_undefined_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_void_keyword(&mut self, it: &mut TSVoidKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_void_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_symbol_keyword(&mut self, it: &mut TSSymbolKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_symbol_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_this_type(&mut self, it: &mut TSThisType) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_this_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_object_keyword(&mut self, it: &mut TSObjectKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_object_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_big_int_keyword(&mut self, it: &mut TSBigIntKeyword) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_big_int_keyword(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_type_reference(&mut self, it: &mut TSTypeReference<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_type_reference(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_qualified_name(&mut self, it: &mut TSQualifiedName<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_qualified_name(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_type_parameter_instantiation(&mut self, it: &mut TSTypeParameterInstantiation<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_type_parameter_instantiation(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_type_parameter(&mut self, it: &mut TSTypeParameter<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_type_parameter(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_type_parameter_declaration(&mut self, it: &mut TSTypeParameterDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_type_parameter_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_type_alias_declaration(&mut self, it: &mut TSTypeAliasDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_type_alias_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_class_implements(&mut self, it: &mut TSClassImplements<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_class_implements(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_interface_declaration(&mut self, it: &mut TSInterfaceDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_interface_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_interface_body(&mut self, it: &mut TSInterfaceBody<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_interface_body(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_property_signature(&mut self, it: &mut TSPropertySignature<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_property_signature(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_index_signature(&mut self, it: &mut TSIndexSignature<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_index_signature(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_call_signature_declaration(&mut self, it: &mut TSCallSignatureDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_call_signature_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_method_signature(&mut self, it: &mut TSMethodSignature<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_method_signature(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_construct_signature_declaration( + &mut self, + it: &mut TSConstructSignatureDeclaration<'a>, + ) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_construct_signature_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_index_signature_name(&mut self, it: &mut TSIndexSignatureName<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_index_signature_name(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_interface_heritage(&mut self, it: &mut TSInterfaceHeritage<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_interface_heritage(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_type_predicate(&mut self, it: &mut TSTypePredicate<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_type_predicate(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_module_declaration(&mut self, it: &mut TSModuleDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_module_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_module_block(&mut self, it: &mut TSModuleBlock<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_module_block(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_type_literal(&mut self, it: &mut TSTypeLiteral<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_type_literal(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_infer_type(&mut self, it: &mut TSInferType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_infer_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_type_query(&mut self, it: &mut TSTypeQuery<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_type_query(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_import_type(&mut self, it: &mut TSImportType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_import_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_import_attributes(&mut self, it: &mut TSImportAttributes<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_import_attributes(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_import_attribute(&mut self, it: &mut TSImportAttribute<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_import_attribute(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_function_type(&mut self, it: &mut TSFunctionType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_function_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_constructor_type(&mut self, it: &mut TSConstructorType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_constructor_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_mapped_type(&mut self, it: &mut TSMappedType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_mapped_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_template_literal_type(&mut self, it: &mut TSTemplateLiteralType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_template_literal_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_as_expression(&mut self, it: &mut TSAsExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_as_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_satisfies_expression(&mut self, it: &mut TSSatisfiesExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_satisfies_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_type_assertion(&mut self, it: &mut TSTypeAssertion<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_type_assertion(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_import_equals_declaration(&mut self, it: &mut TSImportEqualsDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_import_equals_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_external_module_reference(&mut self, it: &mut TSExternalModuleReference<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_external_module_reference(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_non_null_expression(&mut self, it: &mut TSNonNullExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_non_null_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_decorator(&mut self, it: &mut Decorator<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_decorator(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_export_assignment(&mut self, it: &mut TSExportAssignment<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_export_assignment(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_namespace_export_declaration(&mut self, it: &mut TSNamespaceExportDeclaration<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_namespace_export_declaration(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_ts_instantiation_expression(&mut self, it: &mut TSInstantiationExpression<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_ts_instantiation_expression(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_js_doc_nullable_type(&mut self, it: &mut JSDocNullableType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_js_doc_nullable_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_js_doc_non_nullable_type(&mut self, it: &mut JSDocNonNullableType<'a>) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_js_doc_non_nullable_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_js_doc_unknown_type(&mut self, it: &mut JSDocUnknownType) { + self.convert_offset(&mut it.span.start); + walk_mut::walk_js_doc_unknown_type(self, it); + self.convert_offset(&mut it.span.end); + } + + fn visit_object_property(&mut self, it: &mut ObjectProperty<'a>) { + self.convert_offset(&mut it.span.start); + match (it.shorthand, &mut it.key, &mut it.value) { + (true, PropertyKey::StaticIdentifier(key), Expression::Identifier(value)) => { + self.visit_identifier_name(key); + value.span = key.span; + } + (_, key, value) => { + self.visit_property_key(key); + self.visit_expression(value); + } + } + self.convert_offset(&mut it.span.end); + } + + fn visit_binding_property(&mut self, it: &mut BindingProperty<'a>) { + self.convert_offset(&mut it.span.start); + match (it.shorthand, &mut it.key, &mut it.value) { + ( + true, + PropertyKey::StaticIdentifier(key), + BindingPattern { kind: BindingPatternKind::BindingIdentifier(value), .. }, + ) => { + self.visit_identifier_name(key); + value.span = key.span; + } + ( + true, + PropertyKey::StaticIdentifier(key), + BindingPattern { kind: BindingPatternKind::AssignmentPattern(pattern), .. }, + ) => { + self.visit_assignment_pattern(pattern); + key.span = pattern.left.span(); + } + (_, key, value) => { + self.visit_property_key(key); + self.visit_binding_pattern(value); + } + } + self.convert_offset(&mut it.span.end); + } + + fn visit_export_specifier(&mut self, it: &mut ExportSpecifier<'a>) { + self.convert_offset(&mut it.span.start); + match (&mut it.local, &mut it.exported) { + ( + ModuleExportName::IdentifierReference(local), + ModuleExportName::IdentifierName(exported), + ) if local.span == exported.span => { + self.visit_identifier_reference(local); + exported.span = local.span; + } + ( + ModuleExportName::IdentifierName(local), + ModuleExportName::IdentifierName(exported), + ) if local.span == exported.span => { + self.visit_identifier_name(local); + exported.span = local.span; + } + (ModuleExportName::StringLiteral(local), ModuleExportName::StringLiteral(exported)) + if local.span == exported.span => + { + self.visit_string_literal(local); + exported.span = local.span; + } + (local, exported) => { + self.visit_module_export_name(local); + self.visit_module_export_name(exported); + } + } + self.convert_offset(&mut it.span.end); + } + + fn visit_import_specifier(&mut self, it: &mut ImportSpecifier<'a>) { + self.convert_offset(&mut it.span.start); + match &mut it.imported { + ModuleExportName::IdentifierName(imported) if imported.span == it.local.span => { + self.visit_identifier_name(imported); + it.local.span = imported.span; + } + imported => { + self.visit_module_export_name(imported); + self.visit_binding_identifier(&mut it.local); + } + } + self.convert_offset(&mut it.span.end); + } + + fn visit_with_clause(&mut self, it: &mut WithClause<'a>) { + self.visit_import_attributes(&mut it.with_entries); + } + + fn visit_template_literal(&mut self, it: &mut TemplateLiteral<'a>) { + self.convert_offset(&mut it.span.start); + for (quasi, expression) in it.quasis.iter_mut().zip(&mut it.expressions) { + self.visit_template_element(quasi); + self.visit_expression(expression); + } + self.visit_template_element(it.quasis.last_mut().unwrap()); + self.convert_offset(&mut it.span.end); + } +} diff --git a/crates/oxc_ast/src/lib.rs b/crates/oxc_ast/src/lib.rs index b512bb7a239c32..1ec1ab8957fa9b 100644 --- a/crates/oxc_ast/src/lib.rs +++ b/crates/oxc_ast/src/lib.rs @@ -67,6 +67,8 @@ mod generated { pub mod derive_get_span; pub mod derive_get_span_mut; pub mod get_id; + #[cfg(feature = "serialize")] + mod utf8_to_utf16_converter; pub mod visit; pub mod visit_mut; } diff --git a/crates/oxc_ast/src/utf8_to_utf16.rs b/crates/oxc_ast/src/utf8_to_utf16.rs index 0b449f8f85607d..19764c776326b8 100644 --- a/crates/oxc_ast/src/utf8_to_utf16.rs +++ b/crates/oxc_ast/src/utf8_to_utf16.rs @@ -1,54 +1,40 @@ //! Convert UTF-8 span offsets to UTF-16. +use std::cmp::min; + use oxc_span::Span; use oxc_syntax::module_record::{ModuleRecord, VisitMutModuleRecord}; -use crate::{ast::Program, visit::VisitMut}; +use crate::{ + ast::{Comment, Program}, + visit::VisitMut, +}; /// Convert UTF-8 span offsets to UTF-16. pub struct Utf8ToUtf16 { translations: Vec, } +/// A translation from UTF-8 offset to UTF-16 offset. #[derive(Clone, Copy)] #[repr(align(8))] struct Translation { - // UTF-8 byte offset + /// UTF-8 byte offset. + /// This is the UTF-8 offset of start of a Unicode character PLUS 1. + /// So this offset sits in the middle of the Unicode character. + /// Exception is the dummy first entry in table, where it's 0. utf8_offset: u32, - // Number to subtract from UTF-8 byte offset to get UTF-16 char offset - // for offsets *after* `utf8_offset` + /// Number to subtract from UTF-8 byte offset to get UTF-16 char offset + /// for UTF-8 offsets after `utf8_offset` utf16_difference: u32, } impl Utf8ToUtf16 { - /// Create new `Utf8ToUtf16` converter. - #[expect(clippy::new_without_default)] - pub fn new() -> Self { + /// Create new [`Utf8ToUtf16`] conversion table from source text. + pub fn new(source_text: &str) -> Self { let mut translations = Vec::with_capacity(16); translations.push(Translation { utf8_offset: 0, utf16_difference: 0 }); - Self { translations } - } - - /// Convert all spans in the AST to UTF-16. - pub fn convert(&mut self, program: &mut Program<'_>) { - self.build_table(program.source_text); - // Skip if source is entirely ASCII - if self.translations.len() == 1 { - return; - } - self.visit_program(program); - for comment in &mut program.comments { - self.convert_span(&mut comment.span); - } - } - - /// Convert spans in ModuleRecord to UTF-16 - pub fn convert_module_record(&mut self, module_record: &mut ModuleRecord<'_>) { - self.visit_module_record(module_record); - } - #[expect(clippy::cast_possible_truncation)] - fn build_table(&mut self, source_text: &str) { // Translation from UTF-8 byte offset to UTF-16 char offset: // // * 1-byte UTF-8 sequence @@ -70,45 +56,322 @@ impl Utf8ToUtf16 { // // So UTF-16 offset = UTF-8 offset - count of bytes `>= 0xC0` - count of bytes `>= 0xE0` let mut utf16_difference = 0; + #[expect(clippy::cast_possible_truncation)] for (utf8_offset, &byte) in source_text.as_bytes().iter().enumerate() { if byte >= 0xC0 { let difference_for_this_byte = u32::from(byte >= 0xE0) + 1; utf16_difference += difference_for_this_byte; // Record `utf8_offset + 1` not `utf8_offset`, because it's only offsets *after* this // Unicode character that need to be shifted - self.translations + translations .push(Translation { utf8_offset: utf8_offset as u32 + 1, utf16_difference }); } } + + // If no translations have been added after the first `0, 0` dummy, then source is entirely ASCII. + // Remove the dummy entry. + // Therefore, `translations` always has at least 2 entries, if it has any. + if translations.len() == 1 { + translations.clear(); + } + + Self { translations } + } + + /// Create a [`Utf8ToUtf16Converter`] converter, to convert offsets from UTF-8 to UTF-16. + /// + /// The converter is optimized for converting a sequence of offsets in ascending order. + /// It will also correctly handle offsets in any order, but at a performance cost. + /// + /// Returns `None` if the source text is entirely ASCII, and so requires no conversion. + pub fn converter(&self) -> Option { + if self.translations.is_empty() { + None + } else { + // SAFETY: `translations` contains at least 2 entries if it's not empty. + // We just checked it's not empty. + Some(unsafe { Utf8ToUtf16Converter::new(&self.translations) }) + } + } + + /// Convert all spans in AST to UTF-16. + pub fn convert_program(&self, program: &mut Program<'_>) { + if let Some(mut converter) = self.converter() { + converter.visit_program(program); + } + } + + /// Convert all spans in comments to UTF-16. + pub fn convert_comments(&self, comments: &mut [Comment]) { + if let Some(mut converter) = self.converter() { + for comment in comments { + converter.convert_span(&mut comment.span); + } + } + } + + /// Convert all spans in `ModuleRecord` to UTF-16. + pub fn convert_module_record(&self, module_record: &mut ModuleRecord<'_>) { + if let Some(mut converter) = self.converter() { + converter.visit_module_record(module_record); + } } +} + +/// Offset converter, optimized for converting a sequence of offsets in ascending order. +/// +/// ## Implementation details +/// +/// At any time, one range of the source text is active. +/// This range starts at byte `range_start`, and is `range_len` bytes long. +/// The range describes a stretch of source text which contains only ASCII characters. +/// A UTF-8 offset within this range can be converted to UTF-16 offset with the formula +/// `utf16_offset = (utf8_offset - range_start_utf8).wrapping_add(range_start_utf16)`. +/// +/// [`convert_offset`] has a very fast path for converting offsets in the current range. +/// +/// If the offset is outside current range (either before it, or after it), the range containing that +/// offset is identified, and becomes the new current range. +/// +/// Therefore, when converting a sequence of offsets in ascending order, the vast majority of +/// conversions will hit the fast path, as they'll be within the same range as the last offset. +/// When an offset is outside current range, there's a cost (`convert_offset_slow`), +/// but then the stretch of source text containing that offset becomes the current range, +/// and the next run of offsets which are before the end of that range will all hit the fast path again. +/// +/// [`convert_offset`]: Self::convert_offset +pub struct Utf8ToUtf16Converter<'t> { + /// Translation table + translations: &'t [Translation], + /// UTF-8 offset of start of current range + range_start_utf8: u32, + /// Length of current range in UTF-8 bytes + range_len_utf8: u32, + /// UTF-16 offset of start of range. + /// To convert offset within this range: + /// `utf16_offset = (utf8_offset - range_start_utf8).wrapping_add(range_start_utf16)`. + /// Note: `range_start_utf16` is calculated and used with wrapping addition/subtraction, + /// because it can wrap around when a Unicode character very close to start of source. + /// We store UTF-16 range start, rather than `utf16_difference`, because it makes + /// [`Self::convert_offset`] more efficient - 1 less instruction, and 1 less register. + /// + range_start_utf16: u32, + /// Index of current `Translation` + index: u32, +} + +impl<'t> Utf8ToUtf16Converter<'t> { + /// Create new [`Utf8ToUtf16Converter`]. + /// + /// # SAFETY + /// `translations` must contain at least 2 entries. + unsafe fn new(translations: &'t [Translation]) -> Self { + debug_assert!(translations.len() >= 2); - fn convert_span(&self, span: &mut Span) { - span.start = self.convert_offset(span.start); - span.end = self.convert_offset(span.end); + // SAFETY: Caller guarantees `translations` contains at least 2 entries + let range_len_utf8 = unsafe { translations.get_unchecked(1) }.utf8_offset; + + Self { translations, range_start_utf8: 0, range_start_utf16: 0, range_len_utf8, index: 0 } + } + + /// Reset this [`Utf8ToUtf16Converter`] to starting position. + /// + /// After this call, it's ready to convert an ascending sequence of offsets in most efficient way. + pub fn reset(&mut self) { + self.range_start_utf8 = 0; + self.range_start_utf16 = 0; + // SAFETY: Caller guaranteed `translations` contains at least 2 entries in `new` + self.range_len_utf8 = unsafe { self.translations.get_unchecked(1) }.utf8_offset; + self.index = 0; } /// Convert UTF-8 offset to UTF-16. - pub fn convert_offset(&self, utf8_offset: u32) -> u32 { - // Find the first entry in table *after* the UTF-8 offset. - // The difference we need to subtract is recorded in the entry prior to it. - let index = - self.translations.partition_point(|translation| translation.utf8_offset <= utf8_offset); + /// + /// Conversion is faster if `convert_offset` is called with offsets in ascending order. + // + // This method is optimized for the offset being within the current range. + // This will be the case if `convert_offset` is called with offsets in ascending order, + // and Unicode characters are fairly rare within the source. + // + // This method is written to reduce this common path to as few instructions as possible. + // It's only 8 instructions on x86_64, with 2 branches, and using only 1 register. + // https://godbolt.org/z/1xnx1v17T + // + // `#[inline(always)]` because this function is small and on a very hot path. + #[expect(clippy::inline_always)] + #[inline(always)] + pub fn convert_offset(&mut self, offset: &mut u32) { + let utf8_offset = *offset; + + // When AST has been modified, it may contain unspanned AST nodes. + // Offset 0 always translates to 0. + // Don't allow this to fall into the slow path, and don't update the current range, + // because nodes following this will likely be within same range as the last non-generated node. + if utf8_offset == 0 { + return; + } + + let bytes_from_start_of_range = utf8_offset.wrapping_sub(self.range_start_utf8); + if bytes_from_start_of_range <= self.range_len_utf8 { + // Offset is within current range. + // `wrapping_add` because `range_start_utf16` can be `u32::MAX`. + *offset = self.range_start_utf16.wrapping_add(bytes_from_start_of_range); + } else { + // Offset is outside current range - slow path + self.convert_offset_slow(offset); + } + } + + /// Convert UTF-8 offset to UTF-16 where offset is outside of current range + /// (either before it, or after it). + /// + /// We have 1 method for both cases, and branch here on before/after to keep `convert_offset` + /// as streamlined as possible. + #[cold] + #[inline(never)] + #[expect(clippy::cast_possible_truncation)] + fn convert_offset_slow(&mut self, offset: &mut u32) { + // Find the range containing this offset + let utf8_offset = *offset; + let (next_index, range_end_utf8) = if utf8_offset < self.range_start_utf8 { + self.find_range_before(utf8_offset) + } else { + self.find_range_after(utf8_offset) + }; + + // `find_range_before` and `find_range_after` always return a `next_index` which is > 0, + // so `next_index - 1` cannot wrap around + let index = next_index - 1; + + // SAFETY: `find_range_before` and `find_range_after` always return a `next_index` which is + // `<= translations.len()`. So `next_index - 1` is in bounds. + let translation = unsafe { *self.translations.get_unchecked(index) }; + let range_start_utf8 = translation.utf8_offset; + let utf16_difference = translation.utf16_difference; + + self.index = index as u32; + self.range_start_utf8 = range_start_utf8; + self.range_len_utf8 = range_end_utf8 - range_start_utf8; + + // `wrapping_sub` because `utf16_difference` can be `> range_start_utf8` where one of + // first few characters of source is Unicode. e.g.: + // + // * 1st char is Unicode: + // * `range_start_utf8 = 1` (offsets in `Translation`s are the offset of the character + 1). + // * `utf16_difference` is the length of the Unicode char, which is `> 1`. + // + // * If 1st 2 chars are ASCII, but 3rd char is a 4-byte Unicode char: + // * `range_start_utf8 = 3`. + // * `utf16_difference = 4`. + self.range_start_utf16 = range_start_utf8.wrapping_sub(utf16_difference); + + *offset = utf8_offset - utf16_difference; + } + + /// Find range containing `utf8_offset` which is before current range. + /// + /// Returns index of range *after* the range containing the offset, + /// and UTF-8 offset of start of that next range. + /// i.e. the range containing the offset has index 1 less than the index that's returned by this method. + /// + /// The index returned is always `> 0` and `<= self.translations.len()`. + fn find_range_before( + &self, + utf8_offset: u32, + ) -> ( + usize, // index of next range + u32, // UTF-8 offset of start of next range + ) { + // TODO: Do linear search here before resorting to binary search. + // I (@overlookmotel) have left that out for now, because when processing an AST straight + // from the parser, it has offsets in ascending order, so this method won't be called anyway + // for AST spans. It may still be called when processing module record, which may be out of order, + // but module record has few entries, so is not critical for performance. + + // Find the first entry in table *after* the UTF-8 offset. This is the end of the new range. + // Only need to search before current range, as we already current range starts after `utf8_offset`. + // SAFETY: `index` is always in bounds of `translations`. + let search_ranges = unsafe { self.translations.get_unchecked(..self.index as usize) }; + let next_index = + search_ranges.partition_point(|translation| utf8_offset >= translation.utf8_offset); + + // SAFETY: We only searched up to `self.index`, which is less than `translations.len()`. + // So `next_index` is guaranteed to be in bounds. + let range_end_utf8 = unsafe { self.translations.get_unchecked(next_index) }.utf8_offset; + // First entry in table is `0, 0`. `partition_point` finds the first entry where - // `utf8_offset < translation.utf8_offset` (or `translations.len()` if none exists). - // So guaranteed `index > 0`, and `index <= translations.len()`. - // Therefore `index - 1` cannot wrap around, and cannot be out of bounds. - let translation = self.translations[index - 1]; - utf8_offset - translation.utf16_difference + // `utf8_offset >= translation.utf8_offset` (or `translations.len()` if none exists). + // So guaranteed `next_index > 0`, and `next_index <= translations.len()`. + (next_index, range_end_utf8) } -} -impl VisitMut<'_> for Utf8ToUtf16 { - fn visit_span(&mut self, span: &mut Span) { - self.convert_span(span); + /// Find range containing `utf8_offset` which is after current range. + /// + /// Returns index of range *after* the range containing the offset, + /// and UTF-8 offset of start of that next range. + /// i.e. the range containing the offset has index 1 less than the index that's returned by this method. + /// + /// The index returned is always `> 0` and `<= self.translations.len()`. + fn find_range_after( + &self, + utf8_offset: u32, + ) -> ( + usize, // index of next range + u32, // UTF-8 offset of start of next range + ) { + // Find the first entry in table *after* the UTF-8 offset. This is the end of the new range. + + // Try linear search first. + const LINEAR_SEARCH_ITERATIONS: usize = 8; + + // `utf8_offset` is after current range, so there must be another range after this one. + // We don't need to include next range in search because we know it starts before `uft8_offset`, + // and we're looking for a range which starts *after* `uft8_offset`. + let mut next_index = self.index as usize + 2; + let linear_search_end_index = + min(next_index + LINEAR_SEARCH_ITERATIONS, self.translations.len()); + while next_index < linear_search_end_index { + // SAFETY: `linear_search_end_index` is capped at `translations.len()`, + // so `next_index` is in bounds + let translation = unsafe { self.translations.get_unchecked(next_index) }; + if utf8_offset < translation.utf8_offset { + return (next_index, translation.utf8_offset); + } + next_index += 1; + } + + // If linear search exhausted all ranges, without finding a range which starts after `utf8_offset`, + // then offset is in the last range. Return `u32::MAX` as the range end. + if next_index == self.translations.len() { + return (next_index, u32::MAX); + } + + // Binary search over remaining translations. + // SAFETY: `next_index < self.translations.len()`. + let search_ranges = unsafe { self.translations.get_unchecked(next_index..) }; + next_index += + search_ranges.partition_point(|translation| utf8_offset >= translation.utf8_offset); + + let range_end_utf8 = if next_index < self.translations.len() { + self.translations[next_index].utf8_offset + } else { + // `utf8_offset` is in last range. Return `u32::MAX` as the range end. + u32::MAX + }; + + // We started search at a non-zero index, so `next_index` cannot be 0. + // `next_index <= translations.len()`. + (next_index, range_end_utf8) + } + + /// Convert [`Span`] from UTF-8 offsets to UTF-16 offsets. + pub fn convert_span(&mut self, span: &mut Span) { + self.convert_offset(&mut span.start); + self.convert_offset(&mut span.end); } } -impl VisitMutModuleRecord for Utf8ToUtf16 { +impl VisitMutModuleRecord for Utf8ToUtf16Converter<'_> { fn visit_span(&mut self, span: &mut Span) { self.convert_span(span); } @@ -147,7 +410,10 @@ mod test { ]), ); - Utf8ToUtf16::new().convert(&mut program); + let span_converter = Utf8ToUtf16::new(program.source_text); + span_converter.convert_program(&mut program); + span_converter.convert_comments(&mut program.comments); + assert_eq!(program.span, Span::new(0, 11)); assert_eq!(program.body[1].span(), Span::new(1, 5)); let Statement::ExpressionStatement(expr_stmt) = &program.body[1] else { unreachable!() }; @@ -193,11 +459,28 @@ mod test { ("_🤨_🤨_", &[(0, 0), (1, 1), (5, 3), (6, 4), (10, 6), (11, 7)]), ]; - for (text, translations) in cases { - let mut converter = Utf8ToUtf16::new(); - converter.build_table(text); - for &(utf8_offset, expected_utf16_offset) in *translations { - assert_eq!(converter.convert_offset(utf8_offset), expected_utf16_offset); + for &(text, translations) in cases { + let table = Utf8ToUtf16::new(text); + let converter = table.converter(); + if let Some(mut converter) = converter { + // Iterate in forwards order + for &(utf8_offset, expected_utf16_offset) in translations { + let mut utf16_offset = utf8_offset; + converter.convert_offset(&mut utf16_offset); + assert_eq!(utf16_offset, expected_utf16_offset); + } + + // Iterate in backwards order + for &(utf8_offset, expected_utf16_offset) in translations.iter().rev() { + let mut utf16_offset = utf8_offset; + converter.convert_offset(&mut utf16_offset); + assert_eq!(utf16_offset, expected_utf16_offset); + } + } else { + // No Unicode chars. All offsets should be the same. + for &(utf8_offset, expected_utf16_offset) in translations { + assert_eq!(utf8_offset, expected_utf16_offset); + } } } } diff --git a/crates/oxc_parser/examples/parser.rs b/crates/oxc_parser/examples/parser.rs index 8836ac5a8d4c53..b97d0101fe2fca 100644 --- a/crates/oxc_parser/examples/parser.rs +++ b/crates/oxc_parser/examples/parser.rs @@ -41,7 +41,7 @@ fn main() -> Result<(), String> { if show_ast || show_estree { println!("AST:"); if show_estree { - Utf8ToUtf16::new().convert(&mut program); + Utf8ToUtf16::new(&source_text).convert_program(&mut program); } println!("{}", program.to_pretty_estree_ts_json()); } diff --git a/crates/oxc_wasm/src/lib.rs b/crates/oxc_wasm/src/lib.rs index 0f79f896e4441d..b9b30bdf500803 100644 --- a/crates/oxc_wasm/src/lib.rs +++ b/crates/oxc_wasm/src/lib.rs @@ -491,22 +491,37 @@ impl Oxc { } fn convert_ast(&mut self, program: &mut Program) { - Utf8ToUtf16::new().convert(program); + let span_converter = Utf8ToUtf16::new(program.source_text); + span_converter.convert_program(program); self.ast_json = program.to_pretty_estree_ts_json(); - self.comments = Self::map_comments(program.source_text, &program.comments); + + self.comments = Self::map_comments(program.source_text, &program.comments, &span_converter); } - fn map_comments(source_text: &str, comments: &[OxcComment]) -> Vec { + fn map_comments( + source_text: &str, + comments: &[OxcComment], + span_converter: &Utf8ToUtf16, + ) -> Vec { + let mut offset_converter = span_converter.converter(); + comments .iter() - .map(|comment| Comment { - r#type: match comment.kind { - CommentKind::Line => CommentType::Line, - CommentKind::Block => CommentType::Block, - }, - value: comment.content_span().source_text(source_text).to_string(), - start: comment.span.start, - end: comment.span.end, + .map(|comment| { + let value = comment.content_span().source_text(source_text).to_string(); + let mut span = comment.span; + if let Some(converter) = &mut offset_converter { + converter.convert_span(&mut span); + } + Comment { + r#type: match comment.kind { + CommentKind::Line => CommentType::Line, + CommentKind::Block => CommentType::Block, + }, + value, + start: span.start, + end: span.end, + } }) .collect() } diff --git a/napi/parser/src/lib.rs b/napi/parser/src/lib.rs index b3233b7693b835..026380595d7d30 100644 --- a/napi/parser/src/lib.rs +++ b/napi/parser/src/lib.rs @@ -66,45 +66,54 @@ pub fn parse_without_return(filename: String, source_text: String, options: Opti fn parse_with_return(filename: &str, source_text: String, options: &ParserOptions) -> ParseResult { let allocator = Allocator::default(); let source_type = get_source_type(filename, options); - let mut ret = parse(&allocator, source_type, &source_text, options); + let ret = parse(&allocator, source_type, &source_text, options); + let mut program = ret.program; + let mut module_record = ret.module_record; let mut errors = ret.errors.into_iter().map(OxcError::from).collect::>(); - let mut comments = ret - .program + // Convert spans to UTF-16 + let span_converter = Utf8ToUtf16::new(&source_text); + span_converter.convert_program(&mut program); + + // Convert comments + let mut offset_converter = span_converter.converter(); + let comments = program .comments .iter() - .map(|comment| Comment { - r#type: match comment.kind { - CommentKind::Line => String::from("Line"), - CommentKind::Block => String::from("Block"), - }, - value: comment.content_span().source_text(&source_text).to_string(), - start: comment.span.start, - end: comment.span.end, - }) - .collect::>(); - - // Empty `comments` so comment spans don't get converted twice - ret.program.comments.clear(); + .map(|comment| { + let value = comment.content_span().source_text(&source_text).to_string(); + let mut span = comment.span; + if let Some(converter) = offset_converter.as_mut() { + converter.convert_span(&mut span); + } - let mut converter = Utf8ToUtf16::new(); - converter.convert(&mut ret.program); - converter.convert_module_record(&mut ret.module_record); + Comment { + r#type: match comment.kind { + CommentKind::Line => String::from("Line"), + CommentKind::Block => String::from("Block"), + }, + value, + start: span.start, + end: span.end, + } + }) + .collect::>(); - for comment in &mut comments { - comment.start = converter.convert_offset(comment.start); - comment.end = converter.convert_offset(comment.end); - } + // Convert spans in module record to UTF-16 + span_converter.convert_module_record(&mut module_record); - for error in &mut errors { - for label in &mut error.labels { - label.start = converter.convert_offset(label.start); - label.end = converter.convert_offset(label.end); + // Convert spans in errors to UTF-16 + if let Some(mut converter) = span_converter.converter() { + for error in &mut errors { + for label in &mut error.labels { + converter.convert_offset(&mut label.start); + converter.convert_offset(&mut label.end); + } } } - let program = ret.program.to_estree_ts_json(); - let module = EcmaScriptModule::from(&ret.module_record); + let program = program.to_estree_ts_json(); + let module = EcmaScriptModule::from(&module_record); ParseResult { program, module, comments, errors } } diff --git a/tasks/ast_tools/src/generators/mod.rs b/tasks/ast_tools/src/generators/mod.rs index 132f93dbbb248e..6adfdebdbc21b9 100644 --- a/tasks/ast_tools/src/generators/mod.rs +++ b/tasks/ast_tools/src/generators/mod.rs @@ -9,6 +9,7 @@ mod ast_builder; mod ast_kind; mod get_id; mod typescript; +mod utf8_to_utf16; mod visit; pub use assert_layouts::AssertLayouts; @@ -16,6 +17,7 @@ pub use ast_builder::AstBuilderGenerator; pub use ast_kind::AstKindGenerator; pub use get_id::GetIdGenerator; pub use typescript::TypescriptGenerator; +pub use utf8_to_utf16::Utf8ToUtf16ConverterGenerator; pub use visit::VisitGenerator; /// Trait to define a generator. diff --git a/tasks/ast_tools/src/generators/utf8_to_utf16.rs b/tasks/ast_tools/src/generators/utf8_to_utf16.rs new file mode 100644 index 00000000000000..9d013c63905304 --- /dev/null +++ b/tasks/ast_tools/src/generators/utf8_to_utf16.rs @@ -0,0 +1,279 @@ +//! Generator for visitor to convert spans from UTF-8 offsets to UTF-16 offsets. + +use proc_macro2::TokenStream; +use quote::quote; + +use crate::{ + AST_CRATE_PATH, Codegen, Generator, + output::{Output, output_path}, + schema::{Def, Schema, StructDef, TypeId}, + utils::create_ident, +}; + +use super::define_generator; + +/// Generator for visitor to convert spans from UTF-8 offsets to UTF-16 offsets. +pub struct Utf8ToUtf16ConverterGenerator; + +define_generator!(Utf8ToUtf16ConverterGenerator); + +impl Generator for Utf8ToUtf16ConverterGenerator { + fn generate(&self, schema: &Schema, codegen: &Codegen) -> Output { + let output = generate(schema, codegen); + Output::Rust { + path: output_path(AST_CRATE_PATH, "utf8_to_utf16_converter.rs"), + tokens: output, + } + } +} + +/// Generate `VisitMut` impl for `Utf8ToUtf16Converter`. +/// +/// For each AST node, update `span.start` first, then visit child nodes, then update `span.end`. +/// This ensures offsets are updated in ascending order +/// (assuming AST has not been modified since it was parsed, so nodes are in original order). +/// +/// The only exceptions are: +/// 1. Types where a shorthand syntax means 2 nodes have same span e.g. `const {x} = y;`, `export {x}`. +/// 2. `WithClause`, where `IdentifierName` for `with` keyword has span outside of the `WithClause`. +/// 3. `TemplateLiteral`s, where `quasis` and `expressions` are interleaved. +/// +/// Define custom visitors for these types, which ensure `convert_offset` is always called with offsets +/// in ascending order. +fn generate(schema: &Schema, codegen: &Codegen) -> TokenStream { + let estree_derive_id = codegen.get_derive_id_by_name("ESTree"); + let span_type_id = schema.type_names["Span"]; + let skip_type_ids = [ + "ObjectProperty", + "BindingProperty", + "ImportSpecifier", + "ExportSpecifier", + "WithClause", + "TemplateLiteral", + ] + .map(|type_name| schema.type_names[type_name]); + + let methods = schema.types.iter().filter_map(|type_def| { + let struct_def = type_def.as_struct()?; + + if !struct_def.generates_derive(estree_derive_id) { + return None; + } + + if skip_type_ids.contains(&struct_def.id) { + return None; + } + + // Skip `oxc_regular_expression` types. They don't appear in ESTree AST. + if struct_def.file(schema).krate() == "oxc_regular_expression" { + return None; + } + + generate_visitor(struct_def, span_type_id, schema) + }); + + quote! { + use oxc_span::GetSpan; + use oxc_syntax::scope::ScopeFlags; + + ///@@line_break + use crate::{ + ast::*, + utf8_to_utf16::Utf8ToUtf16Converter, + visit::{VisitMut, walk_mut}, + }; + + ///@@line_break + impl<'a> VisitMut<'a> for Utf8ToUtf16Converter<'_> { + #(#methods)* + + ///@@line_break + fn visit_object_property(&mut self, it: &mut ObjectProperty<'a>) { + self.convert_offset(&mut it.span.start); + + // If shorthand, span of `key` and `value` are the same + match (it.shorthand, &mut it.key, &mut it.value) { + (true, PropertyKey::StaticIdentifier(key), Expression::Identifier(value)) => { + self.visit_identifier_name(key); + value.span = key.span; + } + (_, key, value) => { + self.visit_property_key(key); + self.visit_expression(value); + } + } + + self.convert_offset(&mut it.span.end); + } + + ///@@line_break + fn visit_binding_property(&mut self, it: &mut BindingProperty<'a>) { + self.convert_offset(&mut it.span.start); + + // If shorthand, span of `key` and `value` are the same + match (it.shorthand, &mut it.key, &mut it.value) { + ( + true, + PropertyKey::StaticIdentifier(key), + BindingPattern { kind: BindingPatternKind::BindingIdentifier(value), .. }, + ) => { + self.visit_identifier_name(key); + value.span = key.span; + } + ( + true, + PropertyKey::StaticIdentifier(key), + BindingPattern { kind: BindingPatternKind::AssignmentPattern(pattern), .. }, + ) => { + self.visit_assignment_pattern(pattern); + key.span = pattern.left.span(); + } + (_, key, value) => { + self.visit_property_key(key); + self.visit_binding_pattern(value); + } + } + + self.convert_offset(&mut it.span.end); + } + + ///@@line_break + fn visit_export_specifier(&mut self, it: &mut ExportSpecifier<'a>) { + self.convert_offset(&mut it.span.start); + + // `local` and `exported` have same span if e.g.: + // * `export {x}` + // * `export {x} from 'foo.js;` + // * `export {"a-b"} from 'foo.js';` + match (&mut it.local, &mut it.exported) { + ( + ModuleExportName::IdentifierReference(local), + ModuleExportName::IdentifierName(exported), + ) if local.span == exported.span => { + self.visit_identifier_reference(local); + exported.span = local.span; + } + ( + ModuleExportName::IdentifierName(local), + ModuleExportName::IdentifierName(exported), + ) if local.span == exported.span => { + self.visit_identifier_name(local); + exported.span = local.span; + } + ( + ModuleExportName::StringLiteral(local), + ModuleExportName::StringLiteral(exported), + ) if local.span == exported.span => { + self.visit_string_literal(local); + exported.span = local.span; + } + (local, exported) => { + self.visit_module_export_name(local); + self.visit_module_export_name(exported); + } + } + + self.convert_offset(&mut it.span.end); + } + + ///@@line_break + fn visit_import_specifier(&mut self, it: &mut ImportSpecifier<'a>) { + self.convert_offset(&mut it.span.start); + + // `imported` and `local` have same span if e.g. `import {x} from 'foo';` + match &mut it.imported { + ModuleExportName::IdentifierName(imported) if imported.span == it.local.span => { + self.visit_identifier_name(imported); + it.local.span = imported.span; + } + imported => { + self.visit_module_export_name(imported); + self.visit_binding_identifier(&mut it.local); + } + } + + self.convert_offset(&mut it.span.end); + } + + ///@@line_break + fn visit_with_clause(&mut self, it: &mut WithClause<'a>) { + // `WithClause::attributes_keyword` has a span before start of the `WithClause`. + // ESTree does not include that node, nor the span of the `WithClause` itself, + // so skip processing those spans. + self.visit_import_attributes(&mut it.with_entries); + } + + ///@@line_break + fn visit_template_literal(&mut self, it: &mut TemplateLiteral<'a>) { + self.convert_offset(&mut it.span.start); + + // Visit `quasis` and `expressions` in source order. The two `Vec`s are interleaved. + for (quasi, expression) in it.quasis.iter_mut().zip(&mut it.expressions) { + self.visit_template_element(quasi); + self.visit_expression(expression); + } + self.visit_template_element(it.quasis.last_mut().unwrap()); + + self.convert_offset(&mut it.span.end); + } + } + } +} + +/// Generate visitor method. +fn generate_visitor( + struct_def: &StructDef, + span_type_id: TypeId, + schema: &Schema, +) -> Option { + // Find `Span` field. + // Panic if `Span` appears in any other field which is included in ESTree AST. + // We could handle that case, but it's a bit complicated, so not implementing that until we need it. + let mut has_span_field = false; + for field in &struct_def.fields { + if field.type_id == span_type_id && field.name() == "span" { + has_span_field = true; + } else { + assert!( + field.estree.skip + || field.type_def(schema).innermost_type(schema).id() != span_type_id, + "Cannot handle `Span` field: `{}::{}` in `Utf8ToUtf16Converter` generator", + struct_def.name(), + field.name(), + ); + } + } + + if !has_span_field { + return None; + } + + // Generate visitor method + let ty = struct_def.ty(schema); + + let visitor_names = struct_def.visit.visitor_names.as_ref().unwrap(); + let visit_method_ident = visitor_names.visitor_ident(); + let walk_fn_ident = visitor_names.walk_ident(); + + let (extra_params, extra_args): (TokenStream, TokenStream) = struct_def + .visit + .visit_args + .iter() + .map(|(arg_name, arg_type_name)| { + let param_ident = create_ident(arg_name); + let arg_type_ident = create_ident(arg_type_name); + (quote!( , #param_ident: #arg_type_ident ), quote!( , #param_ident )) + }) + .unzip(); + + let visitor = quote! { + ///@@line_break + fn #visit_method_ident(&mut self, it: &mut #ty #extra_params) { + self.convert_offset(&mut it.span.start); + walk_mut::#walk_fn_ident(self, it #extra_args); + self.convert_offset(&mut it.span.end); + } + }; + + Some(visitor) +} diff --git a/tasks/ast_tools/src/main.rs b/tasks/ast_tools/src/main.rs index 3b43f6728ce6c5..d0133b6838aa40 100644 --- a/tasks/ast_tools/src/main.rs +++ b/tasks/ast_tools/src/main.rs @@ -253,6 +253,7 @@ const GENERATORS: &[&(dyn Generator + Sync)] = &[ &generators::AstBuilderGenerator, &generators::GetIdGenerator, &generators::VisitGenerator, + &generators::Utf8ToUtf16ConverterGenerator, &generators::TypescriptGenerator, ]; diff --git a/tasks/benchmark/benches/parser.rs b/tasks/benchmark/benches/parser.rs index 2d31c21662f607..cdbbd90eaf985a 100644 --- a/tasks/benchmark/benches/parser.rs +++ b/tasks/benchmark/benches/parser.rs @@ -48,7 +48,10 @@ fn bench_estree(criterion: &mut Criterion) { .parse() .program; runner.run(|| { - Utf8ToUtf16::new().convert(&mut program); + let span_converter = Utf8ToUtf16::new(program.source_text); + span_converter.convert_program(&mut program); + span_converter.convert_comments(&mut program.comments); + program.to_estree_ts_json(); program }); diff --git a/tasks/coverage/src/tools/estree.rs b/tasks/coverage/src/tools/estree.rs index 36e60432abc157..1753fbbabe3eaf 100644 --- a/tasks/coverage/src/tools/estree.rs +++ b/tasks/coverage/src/tools/estree.rs @@ -136,9 +136,8 @@ impl Case for EstreeTest262Case { } // Convert spans to UTF16 - Utf8ToUtf16::new().convert(&mut program); + Utf8ToUtf16::new(source_text).convert_program(&mut program); - // Remove extra properties from Oxc AST where there is no corresponding property in Acorn AST let acorn_json = match fs::read_to_string(&self.acorn_json_path) { Ok(acorn_json) => acorn_json, Err(e) => {