Skip to content

Commit

Permalink
Convert translation table to a single Vec
Browse files Browse the repository at this point in the history
  • Loading branch information
overlookmotel committed Jan 24, 2025
1 parent 99be31f commit 3ff9196
Showing 1 changed file with 15 additions and 8 deletions.
23 changes: 15 additions & 8 deletions crates/oxc_ast/src/utf8_to_utf16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,17 @@ use crate::{ast::Program, visit::VisitMut};
/// Convert UTF8 span offsets to UTF16.
#[derive(Default)]
pub struct Utf8ToUtf16 {
translations: Vec<Translation>,
}

#[derive(Clone, Copy)]
#[repr(align(8))]
struct Translation {
// UTF8 byte offset
utf8_offsets: Vec<u32>,
utf8_offset: u32,
// Number to subtract from UTF8 byte offset to get UTF16 char offset
// for offsets *after* `utf8_offset`
utf16_differences: Vec<u32>,
utf16_difference: u32,
}

impl Utf8ToUtf16 {
Expand All @@ -24,7 +30,7 @@ impl Utf8ToUtf16 {
pub fn convert(mut self, program: &mut Program<'_>) {
self.build_table(program.source_text);
// Skip if unicode is absent.
if self.utf8_offsets.is_empty() {
if self.translations.is_empty() {
return;
}
self.visit_program(program);
Expand All @@ -40,8 +46,8 @@ impl Utf8ToUtf16 {
if byte >= 0xC0 {
let difference_for_this_byte = u32::from(byte >= 0xE0) + 1;
utf16_difference += difference_for_this_byte;
self.utf8_offsets.push(utf8_offset as u32);
self.utf16_differences.push(utf16_difference);
self.translations
.push(Translation { utf8_offset: utf8_offset as u32, utf16_difference });
}
}
}
Expand All @@ -54,9 +60,10 @@ impl Utf8ToUtf16 {
fn convert_offset(&self, utf8_offset: u32) -> u32 {
// FIXME:
let mut utf16_offset = utf8_offset;
let index = self.utf8_offsets.partition_point(|&offset| offset < utf8_offset);
if let Some(&utf16_difference) = self.utf16_differences.get(index) {
utf16_offset -= utf16_difference;
let index =
self.translations.partition_point(|&translation| translation.utf8_offset < utf8_offset);
if let Some(&translation) = self.translations.get(index) {
utf16_offset -= translation.utf16_difference;
}
utf16_offset
}
Expand Down

0 comments on commit 3ff9196

Please sign in to comment.