From 450b1e6da9105043d403580c5fc7e5dbd7bfcab7 Mon Sep 17 00:00:00 2001 From: overlookmotel Date: Fri, 24 Jan 2025 10:44:22 +0000 Subject: [PATCH] Fix translation --- crates/oxc_ast/src/utf8_to_utf16.rs | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/crates/oxc_ast/src/utf8_to_utf16.rs b/crates/oxc_ast/src/utf8_to_utf16.rs index 79ce7881f38a98..4a5911322e0621 100644 --- a/crates/oxc_ast/src/utf8_to_utf16.rs +++ b/crates/oxc_ast/src/utf8_to_utf16.rs @@ -5,7 +5,6 @@ use oxc_span::Span; use crate::{ast::Program, visit::VisitMut}; /// Convert UTF8 span offsets to UTF16. -#[derive(Default)] pub struct Utf8ToUtf16 { translations: Vec, } @@ -22,15 +21,18 @@ struct Translation { impl Utf8ToUtf16 { /// Create new `Utf8ToUtf16` converter. + #[expect(clippy::new_without_default)] pub fn new() -> Self { - Self::default() + let mut translations = Vec::with_capacity(16); + translations.push(Translation { utf8_offset: 0, utf16_difference: 0 }); + Self { translations } } /// Convert all the spans in the AST to UTF16. pub fn convert(mut self, program: &mut Program<'_>) { self.build_table(program.source_text); // Skip if source is entirely ASCII - if self.translations.is_empty() { + if self.translations.len() == 1 { return; } self.visit_program(program); @@ -46,8 +48,10 @@ impl Utf8ToUtf16 { if byte >= 0xC0 { let difference_for_this_byte = u32::from(byte >= 0xE0) + 1; utf16_difference += difference_for_this_byte; + // Record `utf8_offset + 1` not `utf8_offset`, because it's only offsets *after* this + // Unicode character that need to be shifted self.translations - .push(Translation { utf8_offset: utf8_offset as u32, utf16_difference }); + .push(Translation { utf8_offset: utf8_offset as u32 + 1, utf16_difference }); } } } @@ -58,14 +62,17 @@ impl Utf8ToUtf16 { } fn convert_offset(&self, utf8_offset: u32) -> u32 { - // FIXME: - let mut utf16_offset = utf8_offset; + // Find the first entry in table *after* the UTF8 offset. + // The difference we need to subtract is recorded in the entry prior to it. let index = - self.translations.partition_point(|&translation| translation.utf8_offset < utf8_offset); - if let Some(&translation) = self.translations.get(index) { - utf16_offset -= translation.utf16_difference; - } - utf16_offset + self.translations.partition_point(|translation| translation.utf8_offset <= utf8_offset); + // SAFETY: + // First entry in table is `0, 0`. `partition_point` finds the first entry where + // `utf8_offset < translation.utf8_offset` (or `translations.len()` if none exists). + // So guaranteed `index > 0`, and `index <= translations.len()`. + // Therefore `index - 1` cannot wrap around, and cannot be out of bounds. + let translation = unsafe { self.translations.get_unchecked(index - 1) }; + utf8_offset - translation.utf16_difference } }