diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs index e480c25a51e..e6e27c76a5e 100644 --- a/crates/core_simd/src/masks.rs +++ b/crates/core_simd/src/masks.rs @@ -34,6 +34,7 @@ mod sealed { fn eq(self, other: Self) -> bool; fn to_usize(self) -> usize; + fn max_unsigned() -> u64; type Unsigned: SimdElement; @@ -78,6 +79,11 @@ macro_rules! impl_element { self as usize } + #[inline] + fn max_unsigned() -> u64 { + <$unsigned>::MAX as u64 + } + type Unsigned = $unsigned; const TRUE: Self = -1; diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs index 46b1acf25dd..ee0926bcae8 100644 --- a/crates/core_simd/src/vector.rs +++ b/crates/core_simd/src/vector.rs @@ -1,5 +1,6 @@ use crate::simd::{ cmp::SimdPartialOrd, + num::SimdUint, ptr::{SimdConstPtr, SimdMutPtr}, LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle, }; @@ -261,6 +262,7 @@ where /// # Panics /// /// Panics if the slice's length is less than the vector's `Simd::N`. + /// Use `load_or_default` for an alternative that does not panic. /// /// # Example /// @@ -314,6 +316,143 @@ where unsafe { self.store(slice.as_mut_ptr().cast()) } } + /// Reads contiguous elements from `slice`. Elements are read so long as they're in-bounds for + /// the `slice`. Otherwise, the default value for the element type is returned. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let vec: Vec = vec![10, 11]; + /// + /// let result = Simd::::load_or_default(&vec); + /// assert_eq!(result, Simd::from_array([10, 11, 0, 0])); + /// ``` + #[must_use] + #[inline] + pub fn load_or_default(slice: &[T]) -> Self + where + T: Default, + { + Self::load_or(slice, Default::default()) + } + + /// Reads contiguous elements from `slice`. Elements are read so long as they're in-bounds for + /// the `slice`. Otherwise, the corresponding value from `or` is passed through. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let vec: Vec = vec![10, 11]; + /// let or = Simd::from_array([-5, -4, -3, -2]); + /// + /// let result = Simd::load_or(&vec, or); + /// assert_eq!(result, Simd::from_array([10, 11, -3, -2])); + /// ``` + #[must_use] + #[inline] + pub fn load_or(slice: &[T], or: Self) -> Self { + Self::load_select(slice, Mask::splat(true), or) + } + + /// Reads contiguous elements from `slice`. Each element is read from memory if its + /// corresponding element in `enable` is `true`. + /// + /// When the element is disabled or out of bounds for the slice, that memory location + /// is not accessed and the corresponding value from `or` is passed through. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let enable = Mask::from_array([true, true, false, true]); + /// let or = Simd::from_array([-5, -4, -3, -2]); + /// + /// let result = Simd::load_select(&vec, enable, or); + /// assert_eq!(result, Simd::from_array([10, 11, -3, 13])); + /// ``` + #[must_use] + #[inline] + pub fn load_select_or_default(slice: &[T], enable: Mask<::Mask, N>) -> Self + where + T: Default, + { + Self::load_select(slice, enable, Default::default()) + } + + /// Reads contiguous elements from `slice`. Each element is read from memory if its + /// corresponding element in `enable` is `true`. + /// + /// When the element is disabled or out of bounds for the slice, that memory location + /// is not accessed and the corresponding value from `or` is passed through. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let vec: Vec = vec![10, 11, 12, 13, 14, 15, 16, 17, 18]; + /// let enable = Mask::from_array([true, true, false, true]); + /// let or = Simd::from_array([-5, -4, -3, -2]); + /// + /// let result = Simd::load_select(&vec, enable, or); + /// assert_eq!(result, Simd::from_array([10, 11, -3, 13])); + /// ``` + #[must_use] + #[inline] + pub fn load_select( + slice: &[T], + mut enable: Mask<::Mask, N>, + or: Self, + ) -> Self { + enable &= mask_up_to(slice.len()); + // SAFETY: We performed the bounds check by updating the mask. &[T] is properly aligned to + // the element. + unsafe { Self::load_select_ptr(slice.as_ptr(), enable, or) } + } + + /// Reads contiguous elements from `slice`. Each element is read from memory if its + /// corresponding element in `enable` is `true`. + /// + /// When the element is disabled, that memory location is not accessed and the corresponding + /// value from `or` is passed through. + #[must_use] + #[inline] + pub unsafe fn load_select_unchecked( + slice: &[T], + enable: Mask<::Mask, N>, + or: Self, + ) -> Self { + let ptr = slice.as_ptr(); + // SAFETY: The safety of reading elements from `slice` is ensured by the caller. + unsafe { Self::load_select_ptr(ptr, enable, or) } + } + + /// Reads contiguous elements starting at `ptr`. Each element is read from memory if its + /// corresponding element in `enable` is `true`. + /// + /// When the element is disabled, that memory location is not accessed and the corresponding + /// value from `or` is passed through. + #[must_use] + #[inline] + pub unsafe fn load_select_ptr( + ptr: *const T, + enable: Mask<::Mask, N>, + or: Self, + ) -> Self { + // SAFETY: The safety of reading elements through `ptr` is ensured by the caller. + unsafe { core::intrinsics::simd::simd_masked_load(enable.to_int(), ptr, or) } + } + /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector. /// If an index is out-of-bounds, the element is instead selected from the `or` vector. /// @@ -492,6 +631,77 @@ where unsafe { core::intrinsics::simd::simd_gather(or, source, enable.to_int()) } } + /// Conditionally write contiguous elements to `slice`. The `enable` mask controls + /// which elements are written, as long as they're in-bounds of the `slice`. + /// If the element is disabled or out of bounds, no memory access to that location + /// is made. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let mut arr = [0i32; 4]; + /// let write = Simd::from_array([-5, -4, -3, -2]); + /// let enable = Mask::from_array([false, true, true, true]); + /// + /// write.store_select(&mut arr[..3], enable); + /// assert_eq!(arr, [0, -4, -3, 0]); + /// ``` + #[inline] + pub fn store_select(self, slice: &mut [T], mut enable: Mask<::Mask, N>) { + enable &= mask_up_to(slice.len()); + // SAFETY: We performed the bounds check by updating the mask. &[T] is properly aligned to + // the element. + unsafe { self.store_select_ptr(slice.as_mut_ptr(), enable) } + } + + /// Conditionally write contiguous elements to `slice`. The `enable` mask controls + /// which elements are written. + /// + /// # Safety + /// + /// Every enabled element must be in bounds for the `slice`. + /// + /// # Examples + /// ``` + /// # #![feature(portable_simd)] + /// # #[cfg(feature = "as_crate")] use core_simd::simd; + /// # #[cfg(not(feature = "as_crate"))] use core::simd; + /// # use simd::{Simd, Mask}; + /// let mut arr = [0i32; 4]; + /// let write = Simd::from_array([-5, -4, -3, -2]); + /// let enable = Mask::from_array([false, true, true, true]); + /// + /// unsafe { write.store_select_unchecked(&mut arr, enable) }; + /// assert_eq!(arr, [0, -4, -3, -2]); + /// ``` + #[inline] + pub unsafe fn store_select_unchecked( + self, + slice: &mut [T], + enable: Mask<::Mask, N>, + ) { + let ptr = slice.as_mut_ptr(); + // SAFETY: The safety of writing elements in `slice` is ensured by the caller. + unsafe { self.store_select_ptr(ptr, enable) } + } + + /// Conditionally write contiguous elements starting from `ptr`. + /// The `enable` mask controls which elements are written. + /// When disabled, the memory location corresponding to that element is not accessed. + /// + /// # Safety + /// + /// Memory addresses for element are calculated [`core::ptr::wrapping_offset`] and + /// each enabled element must satisfy the same conditions as [`core::ptr::write`]. + #[inline] + pub unsafe fn store_select_ptr(self, ptr: *mut T, enable: Mask<::Mask, N>) { + // SAFETY: The safety of writing elements through `ptr` is ensured by the caller. + unsafe { core::intrinsics::simd::simd_masked_store(enable.to_int(), ptr, self) } + } + /// Writes the values in a SIMD vector to potentially discontiguous indices in `slice`. /// If an index is out-of-bounds, the write is suppressed without panicking. /// If two elements in the scattered vector would write to the same index @@ -979,3 +1189,37 @@ where { type Mask = isize; } + +#[inline] +fn lane_indices() -> Simd +where + LaneCount: SupportedLaneCount, +{ + let mut index = [0; N]; + for i in 0..N { + index[i] = i; + } + Simd::from_array(index) +} + +#[inline] +fn mask_up_to(len: usize) -> Mask +where + LaneCount: SupportedLaneCount, + M: MaskElement, +{ + let index = lane_indices::(); + let max_value: u64 = M::max_unsigned(); + macro_rules! case { + ($ty:ty) => { + if N < <$ty>::MAX as usize && max_value as $ty as u64 == max_value { + return index.cast().simd_lt(Simd::splat(len.min(N) as $ty)).cast(); + } + }; + } + case!(u8); + case!(u16); + case!(u32); + case!(u64); + index.simd_lt(Simd::splat(len)).cast() +} diff --git a/crates/core_simd/tests/masked_load_store.rs b/crates/core_simd/tests/masked_load_store.rs new file mode 100644 index 00000000000..3d38658e945 --- /dev/null +++ b/crates/core_simd/tests/masked_load_store.rs @@ -0,0 +1,35 @@ +#![feature(portable_simd)] +use core_simd::simd::prelude::*; + +#[cfg(target_arch = "wasm32")] +use wasm_bindgen_test::*; + +#[cfg(target_arch = "wasm32")] +wasm_bindgen_test_configure!(run_in_browser); + +#[test] +#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] +fn masked_load_store() { + let mut arr = [u8::MAX; 7]; + + u8x4::splat(0).store_select(&mut arr[5..], Mask::from_array([false, true, false, true])); + // write to index 8 is OOB and dropped + assert_eq!(arr, [255u8, 255, 255, 255, 255, 255, 0]); + + u8x4::from_array([0, 1, 2, 3]).store_select(&mut arr[1..], Mask::splat(true)); + assert_eq!(arr, [255u8, 0, 1, 2, 3, 255, 0]); + + // read from index 8 is OOB and dropped + assert_eq!( + u8x4::load_or(&arr[4..], u8x4::splat(42)), + u8x4::from_array([3, 255, 0, 42]) + ); + assert_eq!( + u8x4::load_select( + &arr[4..], + Mask::from_array([true, false, true, true]), + u8x4::splat(42) + ), + u8x4::from_array([3, 42, 0, 42]) + ); +}