Skip to content

Commit

Permalink
rt: Emit direct calls for bootstrapped boots
Browse files Browse the repository at this point in the history
  • Loading branch information
dinfuehr committed Jan 4, 2025
1 parent f6ad682 commit da8df9b
Show file tree
Hide file tree
Showing 13 changed files with 140 additions and 34 deletions.
2 changes: 1 addition & 1 deletion dora-runtime/src/boots/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ fn encode_compilation_info(
encode_type_params(vm, &compilation_data.type_params, buffer);
encode_bytecode_type(vm, &compilation_data.return_type, buffer);
encode_location(&compilation_data.loc, buffer);
buffer.emit_bool(mode.is_aot());
buffer.emit_u8(mode as u8);
buffer.emit_bool(compilation_data.emit_debug);
buffer.emit_bool(compilation_data.emit_graph);
buffer.emit_bool(compilation_data.emit_html);
Expand Down
11 changes: 7 additions & 4 deletions dora-runtime/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,19 @@ pub mod lazy_compilation_stub;
pub mod runtime_entry_trampoline;
pub mod trait_object_thunk;

#[derive(Copy, Clone)]
#[derive(Copy, Clone, Debug)]
#[repr(u8)]
pub enum CompilationMode {
Aot,
Stage1,
Stage2,
Stage3,
Jit,
}

impl CompilationMode {
pub fn is_aot(&self) -> bool {
pub fn is_stage2_or_3(&self) -> bool {
match self {
CompilationMode::Aot => true,
CompilationMode::Stage2 | CompilationMode::Stage3 => true,
_ => false,
}
}
Expand Down
61 changes: 47 additions & 14 deletions dora-runtime/src/compiler/aot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ use dora_bytecode::{
};

use crate::compiler::codegen::{compile_runtime_entry_trampoline, CompilerInvocation};
use crate::compiler::{compile_fct_aot, trait_object_thunk, NativeFct, NativeFctKind};
use crate::compiler::{
compile_fct_aot, trait_object_thunk, CompilationMode, NativeFct, NativeFctKind,
};
use crate::gc::{formatted_size, Address};
use crate::os;
use crate::vm::{
Expand Down Expand Up @@ -59,8 +61,14 @@ fn stage1_compiler(
tc: &TransitiveClosure,
entry_id: FunctionId,
) -> (Address, CompiledTransitiveClosure) {
let (compile_address, ctc) =
compiler_stage_n(vm, tc, entry_id, "stage1", CompilerInvocation::Cannon);
let (compile_address, ctc) = compiler_stage_n(
vm,
tc,
entry_id,
"stage1",
CompilerInvocation::Cannon,
CompilationMode::Stage1,
);
(compile_address, ctc)
}

Expand All @@ -76,6 +84,7 @@ fn stage2_compiler(
entry_id,
"stage2",
CompilerInvocation::Boots(stage1_compiler_address),
CompilationMode::Stage2,
)
}

Expand All @@ -91,6 +100,7 @@ fn stage3_compiler(
entry_id,
"stage3",
CompilerInvocation::Boots(stage2_compiler_address),
CompilationMode::Stage3,
)
}

Expand All @@ -100,10 +110,11 @@ fn compiler_stage_n(
entry_id: FunctionId,
name: &str,
compiler: CompilerInvocation,
mode: CompilationMode,
) -> (Address, CompiledTransitiveClosure) {
let start = Instant::now();
let start_code_size = vm.gc.current_code_size();
let ctc = compile_transitive_closure(vm, &tc, compiler);
let ctc = compile_transitive_closure(vm, &tc, compiler, mode);
let compile_address = ctc.get_address(entry_id).expect("missing entry point");
let duration = start.elapsed();
let code_size = vm.gc.current_code_size() - start_code_size;
Expand Down Expand Up @@ -434,11 +445,12 @@ fn compile_transitive_closure(
vm: &VM,
tc: &TransitiveClosure,
compiler: CompilerInvocation,
mode: CompilationMode,
) -> CompiledTransitiveClosure {
let mut ctc = CompiledTransitiveClosure::new();
compile_functions(vm, tc, &mut ctc, compiler);
compile_thunks(vm, tc, &mut ctc, compiler);
prepare_lazy_call_sites(vm, &ctc);
compile_functions(vm, tc, &mut ctc, compiler, mode);
compile_thunks(vm, tc, &mut ctc, compiler, mode);
prepare_lazy_call_sites(vm, &ctc, compiler, mode);
prepare_virtual_method_tables(vm, tc, &ctc);
ctc
}
Expand All @@ -448,9 +460,10 @@ fn compile_functions(
tc: &TransitiveClosure,
ctc: &mut CompiledTransitiveClosure,
compiler: CompilerInvocation,
mode: CompilationMode,
) {
for (fct_id, type_params) in &tc.functions {
compile_function(vm, *fct_id, type_params.clone(), ctc, compiler);
compile_function(vm, *fct_id, type_params.clone(), ctc, compiler, mode);
}
}

Expand All @@ -460,6 +473,7 @@ fn compile_function(
type_params: BytecodeTypeArray,
ctc: &mut CompiledTransitiveClosure,
compiler: CompilerInvocation,
mode: CompilationMode,
) {
let fct = vm.fct(fct_id);

Expand All @@ -480,7 +494,7 @@ fn compile_function(
assert!(existing.is_none());
ctc.code_objects.push(code);
} else if let Some(_) = fct.bytecode {
let (_code_id, code) = compile_fct_aot(vm, fct_id, &type_params, compiler);
let (_code_id, code) = compile_fct_aot(vm, fct_id, &type_params, compiler, mode);
ctc.counter += 1;
let existing = ctc
.function_addresses
Expand All @@ -495,6 +509,7 @@ fn compile_thunks(
tc: &TransitiveClosure,
ctc: &mut CompiledTransitiveClosure,
compiler: CompilerInvocation,
mode: CompilationMode,
) {
for (trait_fct_id, trait_type_params, actual_ty) in &tc.thunks {
let (_code_id, code) = trait_object_thunk::ensure_compiled_aot(
Expand All @@ -503,6 +518,7 @@ fn compile_thunks(
trait_type_params.clone(),
actual_ty.clone(),
compiler,
mode,
);

let combined_type_params = trait_type_params.append(actual_ty.clone());
Expand All @@ -516,7 +532,12 @@ fn compile_thunks(
}
}

fn prepare_lazy_call_sites(_vm: &VM, ctc: &CompiledTransitiveClosure) {
fn prepare_lazy_call_sites(
_vm: &VM,
ctc: &CompiledTransitiveClosure,
_compiler: CompilerInvocation,
mode: CompilationMode,
) {
os::jit_writable();

for code in &ctc.code_objects {
Expand All @@ -527,16 +548,28 @@ fn prepare_lazy_call_sites(_vm: &VM, ctc: &CompiledTransitiveClosure) {
type_params,
const_pool_offset_from_ra,
} => {
let address = ctc
let target = ctc
.function_addresses
.get(&(*fct_id, type_params.clone()))
.cloned()
.expect("missing function");
let ra = code.instruction_start().offset(*offset as usize);
let const_pool_address = ra.ioffset(*const_pool_offset_from_ra as isize);

unsafe {
*const_pool_address.to_mut_ptr::<Address>() = address;
if mode.is_stage2_or_3() && cfg!(target_arch = "x86_64") {
let distance = target.to_usize() as isize - ra.to_usize() as isize;
let distance: i32 = distance.try_into().expect("overflow");

unsafe {
assert_eq!(std::ptr::read(ra.sub(5).to_ptr::<u8>()), 0xE8);
assert_eq!(std::ptr::read(ra.sub(4).to_ptr::<i32>()), 0);
std::ptr::write(ra.sub(4).to_mut_ptr(), distance);
}
} else {
let const_pool_address = ra.ioffset(*const_pool_offset_from_ra as isize);

unsafe {
*const_pool_address.to_mut_ptr::<Address>() = target;
}
}
}

Expand Down
3 changes: 2 additions & 1 deletion dora-runtime/src/compiler/codegen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ pub fn compile_fct_aot(
fct_id: FunctionId,
type_params: &BytecodeTypeArray,
compiler: CompilerInvocation,
mode: CompilationMode,
) -> (CodeId, Arc<Code>) {
let program_fct = vm.fct(fct_id);
let params = BytecodeTypeArray::new(program_fct.params.clone());
Expand All @@ -92,7 +93,7 @@ pub fn compile_fct_aot(
type_params,
compiler,
false,
CompilationMode::Aot,
mode,
);
(code_id, code)
}
Expand Down
3 changes: 2 additions & 1 deletion dora-runtime/src/compiler/trait_object_thunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pub fn ensure_compiled_aot(
trait_type_params: BytecodeTypeArray,
actual_ty: BytecodeType,
compiler: CompilerInvocation,
mode: CompilationMode,
) -> (CodeId, Arc<Code>) {
let trait_object_ty = trait_object_ty(vm, trait_fct_id, &trait_type_params);
let all_type_params = trait_type_params.append(actual_ty.clone());
Expand All @@ -77,7 +78,7 @@ pub fn ensure_compiled_aot(
actual_ty,
compiler,
false,
CompilationMode::Aot,
mode,
);

(code_id, code)
Expand Down
4 changes: 4 additions & 0 deletions dora-runtime/src/cpu/arm64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,3 +295,7 @@ pub static PARAM_OFFSET: i32 = 16;
pub fn next_param_offset(param_offset: i32) -> i32 {
param_offset + 8
}

pub fn patch_direct_call_site(_ra: Address, _distance: i32) {

Check failure on line 299 in dora-runtime/src/cpu/arm64.rs

View workflow job for this annotation

GitHub Actions / test macOS-latest

cannot find type `Address` in this scope
unimplemented!();
}
10 changes: 10 additions & 0 deletions dora-runtime/src/cpu/x64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ use std::sync::atomic::{compiler_fence, Ordering};

use dora_asm::x64::Register;

use crate::Address;

pub fn flush_icache(_: *const u8, _: usize) {
// no flushing needed on x86_64, but emit compiler barrier
compiler_fence(Ordering::SeqCst);
Expand Down Expand Up @@ -175,6 +177,14 @@ impl FReg {
}
}

pub fn patch_direct_call_site(ra: Address, distance: i32) {
unsafe {
assert_eq!(std::ptr::read(ra.sub(5).to_ptr::<u8>()), 0xE8);
assert_eq!(std::ptr::read(ra.sub(4).to_ptr::<i32>()), 0);
std::ptr::write(ra.sub(4).to_mut_ptr(), distance);
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
5 changes: 5 additions & 0 deletions pkgs/boots/bytecode/opcode.dora
Original file line number Diff line number Diff line change
Expand Up @@ -501,3 +501,8 @@ pub fn intrinsicName(opcode: Int32): String {
pub const INSTRUCTION_SET_X64: Int32 = 0;
pub const INSTRUCTION_SET_ARM64: Int32 = 1;

pub const COMPILATION_MODE_STAGE1: Int32 = 0;
pub const COMPILATION_MODE_STAGE2: Int32 = 1;
pub const COMPILATION_MODE_STAGE3: Int32 = 2;
pub const COMPILATION_MODE_JIT: Int32 = 3;

18 changes: 12 additions & 6 deletions pkgs/boots/codegen/x64.dora
Original file line number Diff line number Diff line change
Expand Up @@ -616,18 +616,24 @@ impl CodeGenX64 {
}

fn emitCall(fct_id: FunctionId, type_params: Array[BytecodeType], inlined_location: InlinedLocation, gc_point: GcPoint) {
let address = iface::getFunctionAddress(self.ci, fct_id, type_params);
let cp_entry = self.constPool.push(ConstPoolValue::Address(address));
self.loadConstPool(REG_TEMP, cp_entry);
self.asm.call_r(REG_TEMP);
let const_pool_offset_from_ra = if self.ci.compilationMode.isStage2or3() {
self.asm.call_rel32(0i32);
0i32
} else {
let address = iface::getFunctionAddress(self.ci, fct_id, type_params);
let cp_entry = self.constPool.push(ConstPoolValue::Address(address));
self.loadConstPool(REG_TEMP, cp_entry);
self.asm.call_r(REG_TEMP);
-(self.asm.position() + cp_entry.disp)
};

let pos = self.asm.position();
let site = LazyCompilationSiteDirect(
fct_id,
type_params,
const_pool_offset_from_ra = -(pos + cp_entry.disp),
const_pool_offset_from_ra,
);

let pos = self.asm.position();
self.lazyCompilation.insert(pos, LazyCompilationSite::Direct(site));
self.locations.insert(pos, inlined_location);
self.gcPoints.insert(pos, gc_point);
Expand Down
6 changes: 3 additions & 3 deletions pkgs/boots/compilation.dora
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pub class CompilationInfo {
pub typeParams: Array[BytecodeType],
pub returnType: BytecodeType,
pub loc: Location,
pub isAotMode: Bool,
pub compilationMode: iface::CompilationMode,
pub emitDebug: Bool,
pub emitGraph: Bool,
pub emitHtml: Bool,
Expand All @@ -27,7 +27,7 @@ impl CompilationInfo {
typeParams: Array[BytecodeType],
returnType: BytecodeType,
loc: Location,
isAotMode: Bool,
compilationMode: iface::CompilationMode,
emitDebug: Bool,
emitGraph: Bool,
emitHtml: Bool,
Expand All @@ -39,7 +39,7 @@ impl CompilationInfo {
typeParams,
returnType,
loc,
isAotMode,
compilationMode,
emitDebug,
emitGraph,
emitHtml,
Expand Down
23 changes: 20 additions & 3 deletions pkgs/boots/deserializer.dora
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use package::interface::{Address, Config, StructData, EnumData, EnumVariantData};
use package::interface::{Address, Config, StructData, EnumData, EnumVariantData, CompilationMode};
use package::bytecode::{AliasId, BytecodeFunction, BytecodeType, ClassId, ConstPoolEntry, EnumId, FunctionId, ClassFieldId, Location, StructId, StructFieldId, TraitId};
use package::bytecode::opcode as opc;
use package::compilation::CompilationInfo;
Expand Down Expand Up @@ -50,12 +50,12 @@ pub fn decodeCompilationInfo(reader: ByteReader): CompilationInfo {
let typeParams = decodeTypeParams(reader);
let returnType = decodeBytecodeType(reader);
let loc = decodeLocation(reader);
let isAotMode = reader.readBool();
let compilationMode = decodeCompilationMode(reader);
let emitDebug = reader.readBool();
let emitGraph = reader.readBool();
let emitHtml = reader.readBool();
let emitCodeComments = reader.readBool();
CompilationInfo::new(bc, fctId, typeParams, returnType, loc, isAotMode, emitDebug, emitGraph, emitHtml, emitCodeComments)
CompilationInfo::new(bc, fctId, typeParams, returnType, loc, compilationMode, emitDebug, emitGraph, emitHtml, emitCodeComments)
}

pub fn decodeStructData(reader: ByteReader): StructData {
Expand Down Expand Up @@ -300,6 +300,23 @@ fn decodeConstPoolEntry(reader: ByteReader): ConstPoolEntry {
}
}

fn decodeCompilationMode(reader: ByteReader): CompilationMode {
let opcode = reader.readUInt8().toInt32();

if opcode == opc::COMPILATION_MODE_STAGE1 {
CompilationMode::Stage1
} else if opcode == opc::COMPILATION_MODE_STAGE2 {
CompilationMode::Stage2
} else if opcode == opc::COMPILATION_MODE_STAGE3 {
CompilationMode::Stage3
} else if opcode == opc::COMPILATION_MODE_JIT {
CompilationMode::Jit
} else {
println("unknown CompilationMode opcode = ${opcode}");
unreachable[CompilationMode]()
}
}

fn decodeLocations(reader: ByteReader): Array[(Int32, Location)] {
let size = reader.readInt32().toInt64();

Expand Down
16 changes: 16 additions & 0 deletions pkgs/boots/interface.dora
Original file line number Diff line number Diff line change
Expand Up @@ -579,3 +579,19 @@ pub struct FunctionInliningInfo {
pub hasForceInlineAnnotation: Bool,
pub hasNeverInlineAnnotation: Bool,
}

pub enum CompilationMode {
Stage1,
Stage2,
Stage3,
Jit,
}

impl CompilationMode {
pub fn isStage2or3(): Bool {
match self {
CompilationMode::Stage2 | CompilationMode::Stage3 => true,
CompilationMode::Stage1 | CompilationMode::Jit => false,
}
}
}
Loading

0 comments on commit da8df9b

Please sign in to comment.