Skip to content

Commit

Permalink
feat(ast/estree): raw transfer (experimental)
Browse files Browse the repository at this point in the history
  • Loading branch information
overlookmotel committed Mar 3, 2025
1 parent 9c6ae9f commit 1c49226
Show file tree
Hide file tree
Showing 41 changed files with 8,370 additions and 51 deletions.
6 changes: 6 additions & 0 deletions .github/.generated_ast_watch_list.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,16 @@ src:
- 'crates/oxc_syntax/src/generated/derive_content_eq.rs'
- 'crates/oxc_syntax/src/generated/derive_estree.rs'
- 'crates/oxc_syntax/src/lib.rs'
- 'crates/oxc_syntax/src/module_record.rs'
- 'crates/oxc_syntax/src/number.rs'
- 'crates/oxc_syntax/src/operator.rs'
- 'crates/oxc_syntax/src/reference.rs'
- 'crates/oxc_syntax/src/scope.rs'
- 'crates/oxc_syntax/src/serialize.rs'
- 'crates/oxc_syntax/src/symbol.rs'
- 'napi/parser/deserialize.js'
- 'napi/parser/src/generated/assert_layouts.rs'
- 'napi/parser/src/generated/derive_estree.rs'
- 'napi/parser/src/raw_transfer_types.rs'
- 'npm/oxc-types/types.d.ts'
- 'tasks/ast_tools/src/**'
4 changes: 3 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,8 @@ jobs:
filters: |
src:
- '!crates/oxc_linter/**'
- uses: ./.github/actions/clone-submodules
if: steps.filter.outputs.src == 'true'
- uses: oxc-project/setup-rust@cd82e1efec7fef815e2c23d296756f31c7cdc03d # v1.0.0
if: steps.filter.outputs.src == 'true'
with:
Expand All @@ -311,7 +313,7 @@ jobs:
if: steps.filter.outputs.src == 'true'
- if: steps.filter.outputs.src == 'true'
run: |
pnpm run build-dev
pnpm run build
pnpm run test
pnpm --filter e2e run test
git diff --exit-code # Must commit everything
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,11 @@ assert-unchecked = "0.1.2"
base64 = "0.22.1"
bitflags = "2.8.0"
bpaf = "0.9.16"
bumpalo = "3.17.0"
# `bumpalo` must be pinned to exactly this version.
# `Allocator::from_raw_parts` (used in raw transfer) depends on internal implementation details
# of `bumpalo` which may change in a future version.
# This is a temporary situation - we'll replace `bumpalo` with our own allocator.
bumpalo = "=3.17.0"
compact_str = "0.8.1"
console = "0.15.10"
console_error_panic_hook = "0.1.7"
Expand Down
1 change: 1 addition & 0 deletions crates/oxc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ isolated_declarations = ["oxc_isolated_declarations"]
ast_visit = ["oxc_ast_visit"]

serialize = [
"oxc_allocator/from_raw_parts",
"oxc_ast/serialize",
"oxc_ast_visit/serialize",
"oxc_semantic?/serialize",
Expand Down
1 change: 1 addition & 0 deletions crates/oxc_allocator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ serde = { workspace = true }
serde_json = { workspace = true }

[features]
from_raw_parts = []
serialize = ["dep:serde", "oxc_estree/serialize"]
12 changes: 12 additions & 0 deletions crates/oxc_allocator/src/allocator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,18 @@ impl Allocator {
pub(crate) fn bump(&self) -> &Bump {
&self.bump
}

/// Create [`Allocator`] from a [`bumpalo::Bump`].
///
/// This method is not public. Only used by [`Allocator::from_raw_parts`].
//
// `#[inline(always)]` because it's a no-op
#[cfg(feature = "from_raw_parts")]
#[expect(clippy::inline_always)]
#[inline(always)]
pub(crate) fn from_bump(bump: Bump) -> Self {
Self { bump }
}
}

/// SAFETY: Not actually safe, but for enabling `Send` for downstream crates.
Expand Down
206 changes: 206 additions & 0 deletions crates/oxc_allocator/src/from_raw_parts.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
//! Define additional [`Allocator::from_raw_parts`] method, used only by raw transfer.
use std::{
alloc::Layout,
cell::Cell,
ptr::{self, NonNull},
};

use bumpalo::Bump;

use crate::Allocator;

/// Minimum alignment for allocator chunks. This is hard-coded on `bumpalo`.
const MIN_ALIGN: usize = 16;

const CHUNK_FOOTER_SIZE: usize = size_of::<ChunkFooter>();
const _: () = {
assert!(CHUNK_FOOTER_SIZE >= MIN_ALIGN);
assert!(align_of::<ChunkFooter>() <= MIN_ALIGN);
};

impl Allocator {
/// Minimum size for memory chunk passed to [`Allocator::from_raw_parts`].
pub const RAW_MIN_SIZE: usize = CHUNK_FOOTER_SIZE;

/// Minimum alignment for memory chunk passed to [`Allocator::from_raw_parts`].
pub const RAW_MIN_ALIGN: usize = MIN_ALIGN;

/// Construct a static-sized [`Allocator`] from an existing memory allocation.
///
/// **IMPORTANT: WE MUST NOT CHANGE THE VERSION OF BUMPALO DEPENDENCY**.
///
/// This code only remains sound as long as the code in version of `bumpalo` we're using matches
/// the duplicate of `bumpalo`'s internals contained in this file.
///
/// `bumpalo` is pinned to version `=3.17.0` in `Cargo.toml`.
///
/// The [`Allocator`] which is returned takes ownership of the memory allocation,
/// and the allocation will be freed if the `Allocator` is dropped.
/// If caller wishes to prevent that happening, they must wrap the `Allocator` in `ManuallyDrop`.
///
/// The [`Allocator`] returned by this function cannot grow.
///
/// This hack is all very inadvisable!
/// Only implemented as a temporary stopgap until we replace `bumpalo` with our own allocator.
///
/// # SAFETY
///
/// * `ptr` must be aligned on [`RAW_MIN_ALIGN`].
/// * `size` must be a multiple of [`RAW_MIN_ALIGN`].
/// * `size` must be at least [`RAW_MIN_SIZE`].
/// * The memory region starting at `ptr` and encompassing `size` bytes must be within
/// a single allocation.
///
/// # Panics
///
/// Panics if cannot determine layout of Bumpalo's `Bump` type.
///
/// [`RAW_MIN_ALIGN`]: Self::RAW_MIN_ALIGN
/// [`RAW_MIN_SIZE`]: Self::RAW_MIN_SIZE
pub unsafe fn from_raw_parts(ptr: NonNull<u8>, size: usize) -> Self {
// Only support little-endian systems.
// Calculating offset of `current_chunk_footer` on big-endian systems would be difficult.
#[cfg(target_endian = "big")]
const {
panic!("`Allocator::from_raw_parts` is not supported on big-endian systems.");
}

// Debug assert that `ptr` and `size` fulfill size and alignment requirements
debug_assert!(is_multiple_of(ptr.as_ptr() as usize, MIN_ALIGN));
debug_assert!(is_multiple_of(size, MIN_ALIGN));
debug_assert!(size >= CHUNK_FOOTER_SIZE);

// `Bump` is defined as:
//
// ```
// pub struct Bump {
// current_chunk_footer: Cell<NonNull<ChunkFooter>>,
// allocation_limit: Cell<Option<usize>>,
// }
// ```
//
// `Bump` is not `#[repr(C)]`, so which order the fields are in is unpredictable.
// Deduce the offset of `current_chunk_footer` field by creating a dummy `Bump` where the value
// of the `allocation_limit` field is known.
//
// This should all be const-folded down by compiler.
let current_chunk_footer_field_offset: usize = {
const {
assert!(size_of::<Bump>() == size_of::<[usize; 3]>());
assert!(align_of::<Bump>() == align_of::<[usize; 3]>());
assert!(size_of::<Cell<NonNull<ChunkFooter>>>() == size_of::<usize>());
assert!(align_of::<Cell<NonNull<ChunkFooter>>>() == align_of::<usize>());
assert!(size_of::<Cell<Option<usize>>>() == size_of::<[usize; 2]>());
assert!(align_of::<Cell<Option<usize>>>() == align_of::<usize>());
}

let bump = Bump::new();
bump.set_allocation_limit(Some(123));

// SAFETY:
// `Bump` has same layout as `[usize; 3]` (checked by const assertions above).
// Strictly speaking, reading the fields as `usize`s is UB, as the layout of `Option`
// is not specified. But in practice, `Option` stores its discriminant before its payload,
// so either field order means 3rd `usize` is fully initialized
// (it's either `NonNull<ChunkFooter>>` or the `usize` in `Option<usize>`).
// Once we've figured out the field order, should be safe to check the `Option`
// discriminant as a `u8`.
// Const assertion at top of this function ensures this is a little-endian system,
// so first byte of the 8 bytes containing the discriminant will be initialized, regardless
// of whether compiler chooses to represent the discriminant as `u8`, `u16`, `u32` or `u64`.
unsafe {
let ptr = ptr::from_ref(&bump).cast::<usize>();
if *ptr.add(2) == 123 {
// `allocation_limit` is 2nd field. So `current_chunk_footer` is 1st.
assert_eq!(*ptr.add(1).cast::<u8>(), 1);
0
} else {
// `allocation_limit` is 1st field. So `current_chunk_footer` is 2nd.
assert_eq!(*ptr.add(1), 123);
assert_eq!(*ptr.cast::<u8>(), 1);
2
}
}
};

// Create empty bump with allocation limit of 0 - i.e. it cannot grow.
// This means that the memory chunk we're about to add to the `Bump` will remain its only chunk.
// Therefore it can never be deallocated, until the `Allocator` is dropped.
// `Allocator::reset` would only reset the "cursor" pointer, not deallocate the memory.
let bump = Bump::new();
bump.set_allocation_limit(Some(0));

// Get pointer to `EmptyChunkFooter`.
// SAFETY: We've established the offset of the `current_chunk_footer` field above.
let current_chunk_footer_field = unsafe {
let field_ptr = ptr::addr_of!(bump)
.cast::<Cell<NonNull<ChunkFooter>>>()
.add(current_chunk_footer_field_offset);
&*field_ptr
};
let empty_chunk_footer_ptr = current_chunk_footer_field.get();

// Construct `ChunkFooter` and write into end of allocation.
// SAFETY: Caller guarantees:
// 1. `ptr` is the start of an allocation of `size` bytes.
// 2. `size` is `>= CHUNK_FOOTER_SIZE` - so `size - CHUNK_FOOTER_SIZE` cannot wrap around.
let chunk_footer_ptr = unsafe { ptr.add(size - CHUNK_FOOTER_SIZE) };
// SAFETY: Caller guarantees `size` is a multiple of 16
let layout = unsafe { Layout::from_size_align_unchecked(size, 16) };
let chunk_footer = ChunkFooter {
data: ptr,
layout,
prev: Cell::new(empty_chunk_footer_ptr),
ptr: Cell::new(chunk_footer_ptr),
allocated_bytes: 0,
};
let chunk_footer_ptr = chunk_footer_ptr.cast::<ChunkFooter>();
// SAFETY: If caller has upheld safety requirements, `chunk_footer_ptr` is `CHUNK_FOOTER_SIZE`
// bytes from the end of the allocation, and aligned on 16.
// Const assertions at top of this file ensure that is sufficient alignment for `ChunkFooter`.
// Therefore `chunk_footer_ptr` is valid for writing a `ChunkFooter`.
unsafe { chunk_footer_ptr.write(chunk_footer) };

// Write chunk header into bump's `chunk_header` field
current_chunk_footer_field.set(chunk_footer_ptr);

Self::from_bump(bump)
}
}

/// Allocator chunk footer.
///
/// Copied exactly from `bumpalo` v3.17.0.
///
/// This type is not exposed by `bumpalo` crate, but the type is `#[repr(C)]`, so we can rely on our
/// duplicate here having the same layout, as long as we don't change the version of `bumpalo` we use.
#[repr(C)]
#[derive(Debug)]
struct ChunkFooter {
/// Pointer to the start of this chunk allocation.
/// This footer is always at the end of the chunk.
data: NonNull<u8>,

/// The layout of this chunk's allocation.
layout: Layout,

/// Link to the previous chunk.
///
/// Note that the last node in the `prev` linked list is the canonical empty
/// chunk, whose `prev` link points to itself.
prev: Cell<NonNull<ChunkFooter>>,

/// Bump allocation finger that is always in the range `self.data..=self`.
ptr: Cell<NonNull<u8>>,

/// The bytes allocated in all chunks so far.
/// The canonical empty chunk has a size of 0 and for all other chunks, `allocated_bytes` will be
/// the allocated_bytes of the current chunk plus the allocated bytes of the `prev` chunk.
allocated_bytes: usize,
}

/// Returns `true` if `n` is a multiple of `divisor`.
const fn is_multiple_of(n: usize, divisor: usize) -> bool {
n % divisor == 0
}
2 changes: 2 additions & 0 deletions crates/oxc_allocator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ mod allocator_api2;
mod boxed;
mod clone_in;
mod convert;
#[cfg(feature = "from_raw_parts")]
mod from_raw_parts;
pub mod hash_map;
pub mod string;
mod vec;
Expand Down
13 changes: 11 additions & 2 deletions crates/oxc_ast/src/ast/comment.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#![warn(missing_docs)]
use oxc_allocator::CloneIn;
use oxc_ast_macros::ast;
use oxc_estree::ESTree;
use oxc_span::{ContentEq, Span};

/// Indicates a line or block comment.
#[ast]
#[generate_derive(CloneIn, ContentEq)]
#[generate_derive(CloneIn, ContentEq, ESTree)]
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
#[estree(no_rename_variants, no_ts_def)]
pub enum CommentKind {
/// Line comment
#[default]
Expand Down Expand Up @@ -77,8 +79,9 @@ pub enum CommentAnnotation {

/// A comment in source code.
#[ast]
#[generate_derive(CloneIn, ContentEq)]
#[generate_derive(CloneIn, ContentEq, ESTree)]
#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)]
#[estree(add_fields(value = CommentValue), field_order(kind, value, span), no_ts_def)]
pub struct Comment {
/// The span of the comment text, with leading and trailing delimiters.
pub span: Span,
Expand All @@ -87,22 +90,28 @@ pub struct Comment {
/// `/* Leading */ token`
/// ^ This start
/// NOTE: Trailing comment attachment is not computed yet.
#[estree(skip)]
pub attached_to: u32,

/// Line or block comment
#[estree(rename = "type")]
pub kind: CommentKind,

/// Leading or trailing comment
#[estree(skip)]
pub position: CommentPosition,

/// Whether this comment has a preceding newline.
/// Used to avoid becoming a trailing comment in codegen.
#[estree(skip)]
pub preceded_by_newline: bool,

/// Whether this comment has a tailing newline.
#[estree(skip)]
pub followed_by_newline: bool,

/// Comment Annotation
#[estree(skip)]
pub annotation: CommentAnnotation,
}

Expand Down
21 changes: 21 additions & 0 deletions crates/oxc_ast/src/generated/derive_estree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use oxc_estree::{
ser::{AppendTo, AppendToConcat},
};

use crate::ast::comment::*;
use crate::ast::js::*;
use crate::ast::jsx::*;
use crate::ast::literal::*;
Expand Down Expand Up @@ -3338,3 +3339,23 @@ impl ESTree for JSDocUnknownType {
state.end();
}
}

impl ESTree for CommentKind {
fn serialize<S: Serializer>(&self, serializer: S) {
match self {
Self::Line => JsonSafeString("Line").serialize(serializer),
Self::Block => JsonSafeString("Block").serialize(serializer),
}
}
}

impl ESTree for Comment {
fn serialize<S: Serializer>(&self, serializer: S) {
let mut state = serializer.serialize_struct();
state.serialize_field("type", &self.kind);
state.serialize_field("value", &crate::serialize::CommentValue(self));
state.serialize_field("start", &self.span.start);
state.serialize_field("end", &self.span.end);
state.end();
}
}
Loading

0 comments on commit 1c49226

Please sign in to comment.