From 873902c801419a6048669ad5196d417465c9a62a Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Fri, 11 Nov 2022 11:52:41 -0800 Subject: [PATCH] wip --- hal-x86_64/src/cpu.rs | 2 + hal-x86_64/src/cpu/local.rs | 15 +++- hal-x86_64/src/cpu/smp.rs | 139 +++++++++++++++++++++++++++++++ hal-x86_64/src/cpu/topology.rs | 145 +++++++++++++++++++++++++++++++++ hal-x86_64/src/segment.rs | 31 +++++++ src/arch/x86_64/acpi.rs | 2 + 6 files changed, 330 insertions(+), 4 deletions(-) create mode 100644 hal-x86_64/src/cpu/smp.rs create mode 100644 hal-x86_64/src/cpu/topology.rs diff --git a/hal-x86_64/src/cpu.rs b/hal-x86_64/src/cpu.rs index ed32c5307..f63d6e84a 100644 --- a/hal-x86_64/src/cpu.rs +++ b/hal-x86_64/src/cpu.rs @@ -6,6 +6,8 @@ pub mod intrinsics; #[cfg(feature = "alloc")] pub mod local; pub mod msr; +pub mod smp; +pub mod topology; pub use self::msr::Msr; #[repr(transparent)] diff --git a/hal-x86_64/src/cpu/local.rs b/hal-x86_64/src/cpu/local.rs index 0674f6f86..405a6114f 100644 --- a/hal-x86_64/src/cpu/local.rs +++ b/hal-x86_64/src/cpu/local.rs @@ -1,4 +1,4 @@ -use super::Msr; +use super::{topology::Processor, Msr}; use alloc::boxed::Box; use core::{ arch::asm, @@ -16,6 +16,7 @@ pub struct GsLocalData { /// from `gs:0x0` to get the local data's address. _self: *const Self, magic: usize, + processor: Processor, /// Because this struct is self-referential, it may not be `Unpin`. _must_pin: PhantomPinned, /// Arbitrary user data. @@ -35,12 +36,13 @@ impl GsLocalData { const MAGIC: usize = 0xC0FFEE; pub const MAX_LOCAL_KEYS: usize = 64; - const fn new() -> Self { + pub(crate) const fn new(processor: Processor) -> Self { #[allow(clippy::declare_interior_mutable_const)] // array initializer const LOCAL_SLOT_INIT: AtomicPtr<()> = AtomicPtr::new(ptr::null_mut()); Self { _self: ptr::null(), _must_pin: PhantomPinned, + processor, magic: Self::MAGIC, userdata: [LOCAL_SLOT_INIT; Self::MAX_LOCAL_KEYS], } @@ -78,6 +80,10 @@ impl GsLocalData { .expect("GsLocalData::current() called before local data was initialized on this core!") } + pub fn processor_info(&self) -> &Processor { + &self.processor + } + /// Access a local key on this CPU core's local data. pub fn with(&self, key: &LocalKey, f: impl FnOnce(&T) -> U) -> U { let idx = *key.idx.get(); @@ -108,14 +114,15 @@ impl GsLocalData { /// /// This should only be called a single time per CPU core. #[track_caller] - pub fn init() { + pub(crate) fn init(self: Pin>) { if Self::has_local_data() { tracing::warn!("this CPU core already has local data initialized!"); debug_assert!(false, "this CPU core already has local data initialized!"); return; } - let ptr = Box::into_raw(Box::new(Self::new())); + let this = unsafe { Pin::into_inner_unchecked(self) }; + let ptr = Box::into_raw(this); tracing::trace!(?ptr, "initializing local data"); unsafe { // set up self reference diff --git a/hal-x86_64/src/cpu/smp.rs b/hal-x86_64/src/cpu/smp.rs new file mode 100644 index 000000000..e53312d7e --- /dev/null +++ b/hal-x86_64/src/cpu/smp.rs @@ -0,0 +1,139 @@ +use crate::{ + control_regs::{Cr0, Cr4}, + cpu::{ + msr::{Efer, Msr}, + topology, Ring, + }, + segment, +}; +use core::arch::global_asm; +use mycelium_util::bits; +pub fn bringup() -> Result<(), ()> { + unsafe { + tracing::info!( + "AP trampoline: {:p} .. {:p}", + &AP_TRAMPOLINE_START, + &AP_TRAMPOLINE_END + ); + assert_eq!( + &AP_TRAMPOLINE_START as *const _ as usize, + AP_TRAMPOLINE_ADDR + ); + } + + Ok(()) +} + +extern "C" { + #[link_name = "ap_trampoline"] + static AP_TRAMPOLINE_START: u8; + #[link_name = "ap_trampoline_end"] + static AP_TRAMPOLINE_END: u8; +} + +const AP_TRAMPOLINE_ADDR: usize = 0x8000; +global_asm! { + // /!\ EXTREMELY MESSED UP HACK: stick this in the `.boot-first-stage` + // section that's defined by the `bootloader` crate's linker script, so that + // it gets linked into 16-bit memory. we don't control the linker script, so + // we can't define our own section and stick it in the right place, but we + // can piggyback off of `bootloader`'s linker script. + // + // OBVIOUSLY THIS WILL CRASH AND BURN IF YOU ARENT LINKING WITH `bootloader` + // BUT WHATEVER LOL THATS NOT MY PROBLEM,,, + ".section .boot-first-stage, \"wx\"", + ".code16", + ".org {trampoline_addr}", + ".align 4096", + ".global ap_trampoline", + ".global ap_trampoline_end", + "ap_trampoline:", + " jmp short ap_start", + " .nops 8", + "ap_spinlock: .quad 0", + "ap_pml4: .quad 0", + + "ap_start:", + " cli", + + // zero segment registers + " xor ax, ax", + " mov ds, ax", + " mov es, ax", + " mov ss, ax", + + // initialize stack pointer to an invalid (null) value + " mov sp, 0x0", + + // setup page table + "mov eax, [ap_pml4]", + "mov edi, [eax]", + "mov cr3, edi", + + // init FPU + " fninit", + + // load 32-bit GDT + " lgdt [gdt32_ptr]", + + // set CR4 flags + " mov eax, cr4", + " or eax, {cr4flags}", + " mov cr4, eax", + + // enable long mode in EFER + " mov ecx, {efer_num}", + " rdmsr", + " or eax, {efer_bits}", + " wrmsr", + + // set CR0 flags to enable paging and write protection + " mov ebx, cr0", + " or ebx, {cr0flags}", + " mov cr0, ebx", + + // 32-bit GDT + ".align 16", + "gdt32:", + // TODO(eliza): would be nice to build the bits of the GDT entries in + // Rust... + " .long 0, 0", + " .quad {gdt32_code}", // code segment + " .quad {gdt32_data}", // data segment + " .long 0x00000068, 0x00CF8900", // TSS + "gdt32_ptr:", + " .word gdt32_ptr - gdt32 - 1", // size + " .word gdt32", // offset + "ap_trampoline_end:", + // ".code64", // reset to 64 bit code when exiting the asm block + trampoline_addr = const AP_TRAMPOLINE_ADDR, + cr4flags = const AP_CR4, + cr0flags = const AP_CR0, + efer_num = const Msr::ia32_efer().num, + efer_bits = const EFER_LONG_MODE, + gdt32_code = const segment::Descriptor::code_32() + .with_ring(Ring::Ring0).bits(), + gdt32_data = const segment::Descriptor::data_flat_16() + .bits(), +} + +/// Initial CR4 flags to set for an application processor. +const AP_CR4: u32 = bits::Pack64::pack_in(0) + .set_all(&Cr4::PAGE_SIZE_EXTENSION) + .set_all(&Cr4::PHYSICAL_ADDRESS_EXTENSION) + .set_all(&Cr4::PAGE_GLOBAL_ENABLE) + .set_all(&Cr4::OSFXSR) + .bits() as u32; + +/// Initial CR0 flags to set for an application processor. +const AP_CR0: u32 = bits::Pack64::pack_in(0) + .set_all(&Cr0::PROTECTED_MODE_ENABLE) + .set_all(&Cr0::PAGING_ENABLE) + .set_all(&Cr0::WRITE_PROTECT) + .bits() as u32; + +/// EFER bits to enable long mode +const EFER_LONG_MODE: u32 = bits::Pack64::pack_in(0) + .set_all(&Efer::LONG_MODE_ENABLE) + .set_all(&Efer::NO_EXECUTE_ENABLE) + .bits() as u32; diff --git a/hal-x86_64/src/cpu/topology.rs b/hal-x86_64/src/cpu/topology.rs new file mode 100644 index 000000000..3b52de3aa --- /dev/null +++ b/hal-x86_64/src/cpu/topology.rs @@ -0,0 +1,145 @@ +use crate::segment; +use alloc::{boxed::Box, vec::Vec}; +use core::fmt; + +pub const MAX_CPUS: usize = 256; + +pub type Id = usize; + +#[derive(Debug)] +pub struct Topology { + pub(crate) boot_processor: Processor, + pub(crate) application_processors: Vec, +} + +#[derive(Debug, Clone, Eq, PartialEq)] +#[non_exhaustive] +pub struct Processor { + pub id: Id, + pub device_uid: u32, + pub lapic_id: u32, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum TopologyError { + NoTopology, + Weird(&'static str), +} + +impl Topology { + // TODO(eliza): good error type + #[tracing::instrument(name = "Topology::from_acpi", skip(acpi), err(Display))] + pub fn from_acpi(acpi: &acpi::PlatformInfo) -> Result { + use acpi::platform; + + let platform::ProcessorInfo { + ref application_processors, + ref boot_processor, + } = acpi + .processor_info + .as_ref() + .ok_or(TopologyError::NoTopology)?; + + let bsp = Processor { + id: 0, + device_uid: boot_processor.processor_uid, + lapic_id: boot_processor.local_apic_id, + }; + + if boot_processor.is_ap { + return Err(TopologyError::Weird( + "boot processor claimed to be an application processor", + ))?; + } + + if boot_processor.state != platform::ProcessorState::Running { + return Err(TopologyError::Weird( + "boot processor claimed to not be running", + ))?; + } + + tracing::info!( + bsp.id, + bsp.device_uid, + bsp.lapic_id, + "boot processor seems normalish" + ); + + let mut id = 1; + let mut disabled = 0; + let mut aps = Vec::with_capacity(application_processors.len()); + for ap in application_processors { + if !ap.is_ap { + return Err(TopologyError::Weird( + "application processor claimed to be the boot processor", + ))?; + } + + match ap.state { + // if the firmware disabled a processor, just skip it + platform::ProcessorState::Disabled => { + tracing::warn!( + ap.device_uid = ap.processor_uid, + "application processor disabled by firmware, skipping it" + ); + disabled += 1; + continue; + } + // if a processor claims it's already running, that seems messed up! + platform::ProcessorState::Running => { + return Err(TopologyError::Weird( + "application processors should not be running yet", + )); + } + // otherwise, add it to the topology + platform::ProcessorState::WaitingForSipi => {} + } + + let ap = Processor { + id, + device_uid: ap.processor_uid, + lapic_id: ap.local_apic_id, + }; + tracing::debug!( + ap.id, + ap.device_uid, + ap.lapic_id, + "found application processor" + ); + + aps.push(ap); + id += 1; + } + + tracing::info!( + "found {} application processors ({} disabled)", + application_processors.len(), + disabled, + ); + + Ok(Self { + application_processors: aps, + boot_processor: bsp, + }) + } +} + +impl Processor { + pub(crate) fn init_processor(&self, gdt: &mut segment::Gdt) { + tracing::info!(self.id, "initializing processor"); + use super::local::GsLocalData; + Box::pin(GsLocalData::new(self.clone())).init(); + } +} + +impl fmt::Display for TopologyError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TopologyError::NoTopology => f.pad("no topology information found in MADT"), + TopologyError::Weird(msg) => { + write!(f, "found something weird: {msg}, is the MADT corrupted?") + } + } + } +} diff --git a/hal-x86_64/src/segment.rs b/hal-x86_64/src/segment.rs index 6f052949a..61bd4f664 100644 --- a/hal-x86_64/src/segment.rs +++ b/hal-x86_64/src/segment.rs @@ -125,6 +125,15 @@ pub struct Gdt { push_at: usize, } +#[derive(Clone)] +#[repr(C)] +pub struct LongModeGdt { + null_descriptor: Descriptor, + kernel_code: Descriptor, + kernel_data: Descriptor, + tss_descrs: [SystemDescriptor; crate::cpu::topology::MAX_CPUS], +} + /// A 64-bit mode descriptor for a system segment (such as an LDT or TSS /// descriptor). #[derive(Copy, Clone, Eq, PartialEq)] @@ -479,6 +488,20 @@ impl Descriptor { Self(Self::DEFAULT_BITS | Self::DATA_FLAGS) } + // TODO(eliza): construct this more nicely + pub(crate) const fn data_flat_16() -> Self { + Self( + Packing64::new(0) + .set_all(&Self::LIMIT_LOW) + .set_all(&Self::READABLE) + .set_all(&Self::IS_USER_SEGMENT) + .set_all(&Self::IS_PRESENT) + .set_all(&Self::LIMIT_HIGH) + .set_all(&Self::GRANULARITY) + .bits(), + ) + } + pub fn ring(&self) -> cpu::Ring { cpu::Ring::from_u8(self.ring_bits()) } @@ -678,6 +701,14 @@ mod tests { dbg!(SystemDescriptor::BASE_HIGH_PAIR); } + #[test] + fn data_flat_16() { + assert_eq!( + Descriptor::data_flat_16(), + Descriptor::from_bits(0x008F_9200_0000_FFFF) + ) + } + proptest! { #[test] fn system_segment_tss_base(addr: u64) { diff --git a/src/arch/x86_64/acpi.rs b/src/arch/x86_64/acpi.rs index 70042fcf1..64ad71f10 100644 --- a/src/arch/x86_64/acpi.rs +++ b/src/arch/x86_64/acpi.rs @@ -58,6 +58,8 @@ pub fn bringup_smp(platform: &acpi::PlatformInfo) -> Result<(), Error> { application_processors.len() ); tracing::debug!(?application_processors); + hal_x86_64::cpu::smp::bringup().unwrap(); + tracing::warn!("not starting app processors (SMP support isn't done yet)"); Ok(())