Skip to content

Commit

Permalink
Fix translation
Browse files Browse the repository at this point in the history
  • Loading branch information
overlookmotel committed Jan 24, 2025
1 parent 89a0aa8 commit 450b1e6
Showing 1 changed file with 18 additions and 11 deletions.
29 changes: 18 additions & 11 deletions crates/oxc_ast/src/utf8_to_utf16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use oxc_span::Span;
use crate::{ast::Program, visit::VisitMut};

/// Convert UTF8 span offsets to UTF16.
#[derive(Default)]
pub struct Utf8ToUtf16 {
translations: Vec<Translation>,
}
Expand All @@ -22,15 +21,18 @@ struct Translation {

impl Utf8ToUtf16 {
/// Create new `Utf8ToUtf16` converter.
#[expect(clippy::new_without_default)]
pub fn new() -> Self {
Self::default()
let mut translations = Vec::with_capacity(16);
translations.push(Translation { utf8_offset: 0, utf16_difference: 0 });
Self { translations }
}

/// Convert all the spans in the AST to UTF16.
pub fn convert(mut self, program: &mut Program<'_>) {
self.build_table(program.source_text);
// Skip if source is entirely ASCII
if self.translations.is_empty() {
if self.translations.len() == 1 {
return;
}
self.visit_program(program);
Expand All @@ -46,8 +48,10 @@ impl Utf8ToUtf16 {
if byte >= 0xC0 {
let difference_for_this_byte = u32::from(byte >= 0xE0) + 1;
utf16_difference += difference_for_this_byte;
// Record `utf8_offset + 1` not `utf8_offset`, because it's only offsets *after* this
// Unicode character that need to be shifted
self.translations
.push(Translation { utf8_offset: utf8_offset as u32, utf16_difference });
.push(Translation { utf8_offset: utf8_offset as u32 + 1, utf16_difference });
}
}
}
Expand All @@ -58,14 +62,17 @@ impl Utf8ToUtf16 {
}

fn convert_offset(&self, utf8_offset: u32) -> u32 {
// FIXME:
let mut utf16_offset = utf8_offset;
// Find the first entry in table *after* the UTF8 offset.
// The difference we need to subtract is recorded in the entry prior to it.
let index =
self.translations.partition_point(|&translation| translation.utf8_offset < utf8_offset);
if let Some(&translation) = self.translations.get(index) {
utf16_offset -= translation.utf16_difference;
}
utf16_offset
self.translations.partition_point(|translation| translation.utf8_offset <= utf8_offset);
// SAFETY:
// First entry in table is `0, 0`. `partition_point` finds the first entry where
// `utf8_offset < translation.utf8_offset` (or `translations.len()` if none exists).
// So guaranteed `index > 0`, and `index <= translations.len()`.
// Therefore `index - 1` cannot wrap around, and cannot be out of bounds.
let translation = unsafe { self.translations.get_unchecked(index - 1) };
utf8_offset - translation.utf16_difference
}
}

Expand Down

0 comments on commit 450b1e6

Please sign in to comment.