Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add the pext and pdep intrinsics #3676

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions src/shims/x86/bmi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
use rustc_span::Symbol;
use rustc_target::spec::abi::Abi;

use crate::*;

impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
fn emulate_x86_bmi_intrinsic(
&mut self,
link_name: Symbol,
abi: Abi,
args: &[OpTy<'tcx>],
dest: &MPlaceTy<'tcx>,
) -> InterpResult<'tcx, EmulateItemResult> {
let this = self.eval_context_mut();
// Prefix should have already been checked.
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.bmi.").unwrap();

match unprefixed_name {
"pdep.32" => {
// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u32>
this.expect_target_feature_for_intrinsic(link_name, "bmi2")?;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

llvm unhelpfully does not make a distinction between bmi1 and bmi2: all bmi intrinsics are in the bmi namespace. So I don't see another way than to in every branch check for the relevant target features, even though it is error-prone.


let [source, mask] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let source = this.read_scalar(source)?.to_u32()?;
let mask = this.read_scalar(mask)?.to_u32()?;
let destination = pdep(source as u64, mask as u64) as u32;

this.write_scalar(Scalar::from_u32(destination), dest)?;
}
"pdep.64" => {
// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u64>
this.expect_target_feature_for_intrinsic(link_name, "bmi2")?;

let [source, mask] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let source = this.read_scalar(source)?.to_u64()?;
let mask = this.read_scalar(mask)?.to_u64()?;
let destination = pdep(source, mask);

this.write_scalar(Scalar::from_u64(destination), dest)?;
}
"pext.32" => {
// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u32>
this.expect_target_feature_for_intrinsic(link_name, "bmi2")?;

let [source, mask] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let source = this.read_scalar(source)?.to_u32()?;
let mask = this.read_scalar(mask)?.to_u32()?;
let destination = pext(source as u64, mask as u64) as u32;

this.write_scalar(Scalar::from_u32(destination), dest)?;
}
"pext.64" => {
// <https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u64>
this.expect_target_feature_for_intrinsic(link_name, "bmi2")?;

let [source, mask] =
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;

let source = this.read_scalar(source)?.to_u64()?;
let mask = this.read_scalar(mask)?.to_u64()?;
let destination = pext(source, mask);

this.write_scalar(Scalar::from_u64(destination), dest)?;
}
_ => return Ok(EmulateItemResult::NotSupported),
}
Ok(EmulateItemResult::NeedsReturn)
}
}

/// Parallel bit deposition
///
/// Deposit contiguous low bits from unsigned 64-bit integer `source` to `destination` at the corresponding bit locations
/// specified by `selector_mask`; all other bits in `destination` are set to zero.
///
/// See also
///
/// - https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u64
/// - https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#Parallel_bit_deposit_and_extract
fn pdep(source: u64, selector_mask: u64) -> u64 {
let mut destination = 0u64;
let mut j = 0;
for i in 0..64 {
if selector_mask & (1 << i) != 0 {
if source & (1 << j) != 0 {
destination |= 1 << i;
}

j += 1;
}
}

destination
}

/// Parallel bit extraction
///
/// Extract bits from unsigned 64-bit integer `source` at the corresponding bit locations specified by `selector_mask`
/// to contiguous low bits in `destination`; the remaining upper bits in `destination` are set to zero.
///
/// See also
///
/// - https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u64
/// - https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#Parallel_bit_deposit_and_extract
fn pext(source: u64, selector_mask: u64) -> u64 {
let mut destination = 0u64;
let mut j = 0;
for i in 0..64 {
if selector_mask & (1 << i) != 0 {
if source & (1 << i) != 0 {
destination |= 1 << j;
}

j += 1;
}
}

destination
}
6 changes: 6 additions & 0 deletions src/shims/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use helpers::bool_to_simd_element;
mod aesni;
mod avx;
mod avx2;
mod bmi;
mod sse;
mod sse2;
mod sse3;
Expand Down Expand Up @@ -113,6 +114,11 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
pclmulqdq(this, left, right, imm, dest)?;
}

name if name.starts_with("bmi.") => {
return bmi::EvalContextExt::emulate_x86_bmi_intrinsic(
this, link_name, abi, args, dest,
);
}
name if name.starts_with("sse.") => {
return sse::EvalContextExt::emulate_x86_sse_intrinsic(
this, link_name, abi, args, dest,
Expand Down
41 changes: 41 additions & 0 deletions tests/pass/shims/x86/intrinsics-x86-bmi2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Ignore everything except x86 and x86_64
// Any new targets that are added to CI should be ignored here.
// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
//@ignore-target-aarch64
//@ignore-target-arm
//@ignore-target-avr
//@ignore-target-s390x
//@ignore-target-thumbv7em
//@ignore-target-wasm32
//@compile-flags: -C target-feature=+bmi2

#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

fn main() {
assert!(is_x86_feature_detected!("bmi2"));

unsafe {
run_x86();
#[cfg(target_arch = "x86_64")]
run_x86_64();
};
}

#[target_feature(enable = "bmi2")]
unsafe fn run_x86() {
assert_eq!(_pdep_u32(0x00012567, 0xff00fff0), 0x12005670);

assert_eq!(_pext_u32(0x12345678, 0xff00fff0), 0x00012567);
}

#[cfg(target_arch = "x86_64")]
unsafe fn run_x86_64() {
assert_eq!(_pdep_u64(0x00012567, 0xff00fff0), 0x12005670);
assert_eq!(_pdep_u64(0x0000_0134_5678_9CDE, 0xff0f_ffff_ff00_fff0), 0x0103_4567_8900_CDE0);

assert_eq!(_pext_u64(0x12345678, 0xff00fff0), 0x00012567);
assert_eq!(_pext_u64(0x0123_4567_89AB_CDEF, 0xff0f_ffff_ff00_fff0), 0x0000_0134_5678_9CDE);
}
Loading