From ec21313a78791e60bb4b9590b51106934be03f9b Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 15 Jun 2024 18:15:28 +0200 Subject: [PATCH 1/3] add the `pext` and `pdep` intrinsics --- src/shims/x86/bmi.rs | 126 ++++++++++++++++++++ src/shims/x86/mod.rs | 6 + tests/pass/shims/x86/intrinsics-x86-bmi2.rs | 41 +++++++ 3 files changed, 173 insertions(+) create mode 100644 src/shims/x86/bmi.rs create mode 100644 tests/pass/shims/x86/intrinsics-x86-bmi2.rs diff --git a/src/shims/x86/bmi.rs b/src/shims/x86/bmi.rs new file mode 100644 index 0000000000..49dbd44583 --- /dev/null +++ b/src/shims/x86/bmi.rs @@ -0,0 +1,126 @@ +use rustc_span::Symbol; +use rustc_target::spec::abi::Abi; + +use crate::*; + +impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} +pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { + fn emulate_x86_bmi_intrinsic( + &mut self, + link_name: Symbol, + abi: Abi, + args: &[OpTy<'tcx>], + dest: &MPlaceTy<'tcx>, + ) -> InterpResult<'tcx, EmulateItemResult> { + let this = self.eval_context_mut(); + // Prefix should have already been checked. + let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.bmi.").unwrap(); + + match unprefixed_name { + "pdep.32" => { + // + this.expect_target_feature_for_intrinsic(link_name, "bmi2")?; + + let [source, mask] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let source = this.read_scalar(source)?.to_u32()?; + let mask = this.read_scalar(mask)?.to_u32()?; + let destination = pdep(source as u64, mask as u64) as u32; + + this.write_scalar(Scalar::from_u32(destination), dest)?; + } + "pdep.64" => { + // + this.expect_target_feature_for_intrinsic(link_name, "bmi2")?; + + let [source, mask] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let source = this.read_scalar(source)?.to_u64()?; + let mask = this.read_scalar(mask)?.to_u64()?; + let destination = pdep(source, mask); + + this.write_scalar(Scalar::from_u64(destination), dest)?; + } + "pext.32" => { + // + this.expect_target_feature_for_intrinsic(link_name, "bmi2")?; + + let [source, mask] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let source = this.read_scalar(source)?.to_u32()?; + let mask = this.read_scalar(mask)?.to_u32()?; + let destination = pext(source as u64, mask as u64) as u32; + + this.write_scalar(Scalar::from_u32(destination), dest)?; + } + "pext.64" => { + // + this.expect_target_feature_for_intrinsic(link_name, "bmi2")?; + + let [source, mask] = + this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + + let source = this.read_scalar(source)?.to_u64()?; + let mask = this.read_scalar(mask)?.to_u64()?; + let destination = pext(source, mask); + + this.write_scalar(Scalar::from_u64(destination), dest)?; + } + _ => return Ok(EmulateItemResult::NotSupported), + } + Ok(EmulateItemResult::NeedsReturn) + } +} + +/// Parallel bit deposition +/// +/// Deposit contiguous low bits from unsigned 64-bit integer `source` to `destination` at the corresponding bit locations +/// specified by `selector_mask`; all other bits in `destination` are set to zero. +/// +/// See also +/// +/// - https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u64 +/// - https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#Parallel_bit_deposit_and_extract +fn pdep(source: u64, selector_mask: u64) -> u64 { + let mut destination = 0u64; + let mut j = 0; + for i in 0..64 { + if selector_mask & (1 << i) != 0 { + if source & (1 << j) != 0 { + destination |= 1 << i; + } + + j += 1; + } + } + + destination +} + +/// Parallel bit extraction +/// +/// Extract bits from unsigned 64-bit integer `source` at the corresponding bit locations specified by `selector_mask` +/// to contiguous low bits in `destination`; the remaining upper bits in `destination` are set to zero. +/// +/// See also +/// +/// - https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u64 +/// - https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#Parallel_bit_deposit_and_extract +fn pext(source: u64, selector_mask: u64) -> u64 { + let mut destination = 0u64; + let mut j = 0; + for i in 0..64 { + if selector_mask & (1 << i) != 0 { + if source & (1 << i) != 0 { + destination |= 1 << j; + } + + j += 1; + } + } + + destination +} diff --git a/src/shims/x86/mod.rs b/src/shims/x86/mod.rs index b71aec0216..704c45fdd6 100644 --- a/src/shims/x86/mod.rs +++ b/src/shims/x86/mod.rs @@ -14,6 +14,7 @@ use helpers::bool_to_simd_element; mod aesni; mod avx; mod avx2; +mod bmi; mod sse; mod sse2; mod sse3; @@ -113,6 +114,11 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { pclmulqdq(this, left, right, imm, dest)?; } + name if name.starts_with("bmi.") => { + return bmi::EvalContextExt::emulate_x86_bmi_intrinsic( + this, link_name, abi, args, dest, + ); + } name if name.starts_with("sse.") => { return sse::EvalContextExt::emulate_x86_sse_intrinsic( this, link_name, abi, args, dest, diff --git a/tests/pass/shims/x86/intrinsics-x86-bmi2.rs b/tests/pass/shims/x86/intrinsics-x86-bmi2.rs new file mode 100644 index 0000000000..69580845b3 --- /dev/null +++ b/tests/pass/shims/x86/intrinsics-x86-bmi2.rs @@ -0,0 +1,41 @@ +// Ignore everything except x86 and x86_64 +// Any new targets that are added to CI should be ignored here. +// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) +//@ignore-target-aarch64 +//@ignore-target-arm +//@ignore-target-avr +//@ignore-target-s390x +//@ignore-target-thumbv7em +//@ignore-target-wasm32 +//@compile-flags: -C target-feature=+bmi2 + +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +fn main() { + assert!(is_x86_feature_detected!("bmi2")); + + unsafe { + run_x86(); + #[cfg(target_arch = "x86_64")] + run_x86_64(); + }; +} + +#[target_feature(enable = "bmi2")] +unsafe fn run_x86() { + assert_eq!(_pdep_u32(0x00012567, 0xff00fff0), 0x12005670); + + assert_eq!(_pext_u32(0x12345678, 0xff00fff0), 0x00012567); +} + +#[cfg(target_arch = "x86_64")] +unsafe fn run_x86_64() { + assert_eq!(_pdep_u64(0x00012567, 0xff00fff0), 0x12005670); + assert_eq!(_pdep_u64(0x0000_0134_5678_9CDE, 0xff0f_ffff_ff00_fff0), 0x0103_4567_8900_CDE0); + + assert_eq!(_pext_u64(0x12345678, 0xff00fff0), 0x00012567); + assert_eq!(_pext_u64(0x0123_4567_89AB_CDEF, 0xff0f_ffff_ff00_fff0), 0x0000_0134_5678_9CDE); +} From f3f548e6c50e68e8245fbedb27f7a0d153cf728b Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 15 Jun 2024 18:39:08 +0200 Subject: [PATCH 2/3] clippy --- src/shims/x86/bmi.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/shims/x86/bmi.rs b/src/shims/x86/bmi.rs index 49dbd44583..ee26ddbb15 100644 --- a/src/shims/x86/bmi.rs +++ b/src/shims/x86/bmi.rs @@ -26,7 +26,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let source = this.read_scalar(source)?.to_u32()?; let mask = this.read_scalar(mask)?.to_u32()?; - let destination = pdep(source as u64, mask as u64) as u32; + let destination = (pdep(u64::from(source), u64::from(mask)) & 0xFFFF_FFFF) as u32; this.write_scalar(Scalar::from_u32(destination), dest)?; } @@ -52,7 +52,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let source = this.read_scalar(source)?.to_u32()?; let mask = this.read_scalar(mask)?.to_u32()?; - let destination = pext(source as u64, mask as u64) as u32; + let destination = (pext(u64::from(source), u64::from(mask)) & 0xFFFF_FFFF) as u32; this.write_scalar(Scalar::from_u32(destination), dest)?; } @@ -86,14 +86,14 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { /// - https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#Parallel_bit_deposit_and_extract fn pdep(source: u64, selector_mask: u64) -> u64 { let mut destination = 0u64; - let mut j = 0; + let mut j = 0usize; for i in 0..64 { if selector_mask & (1 << i) != 0 { if source & (1 << j) != 0 { destination |= 1 << i; } - j += 1; + j = j.wrapping_add(1); } } @@ -111,14 +111,14 @@ fn pdep(source: u64, selector_mask: u64) -> u64 { /// - https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#Parallel_bit_deposit_and_extract fn pext(source: u64, selector_mask: u64) -> u64 { let mut destination = 0u64; - let mut j = 0; + let mut j = 0usize; for i in 0..64 { if selector_mask & (1 << i) != 0 { if source & (1 << i) != 0 { destination |= 1 << j; } - j += 1; + j = j.wrapping_add(1); } } From 085e3ba28c0470d6647161ad046a3490475c22c7 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 15 Jun 2024 18:45:14 +0200 Subject: [PATCH 3/3] rustdoc --- src/shims/x86/bmi.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/shims/x86/bmi.rs b/src/shims/x86/bmi.rs index ee26ddbb15..09b078a86f 100644 --- a/src/shims/x86/bmi.rs +++ b/src/shims/x86/bmi.rs @@ -82,8 +82,8 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { /// /// See also /// -/// - https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u64 -/// - https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#Parallel_bit_deposit_and_extract +/// - +/// - fn pdep(source: u64, selector_mask: u64) -> u64 { let mut destination = 0u64; let mut j = 0usize; @@ -107,8 +107,8 @@ fn pdep(source: u64, selector_mask: u64) -> u64 { /// /// See also /// -/// - https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u64 -/// - https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#Parallel_bit_deposit_and_extract +/// - +/// - fn pext(source: u64, selector_mask: u64) -> u64 { let mut destination = 0u64; let mut j = 0usize;