Skip to content

Commit

Permalink
riscv: Provide all operation when Zacas extension is available even i…
Browse files Browse the repository at this point in the history
…f A extension is not available
  • Loading branch information
taiki-e committed Feb 23, 2025
1 parent 02bb367 commit e72e39d
Show file tree
Hide file tree
Showing 8 changed files with 237 additions and 24 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,12 @@ jobs:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} --cfg atomic_maybe_uninit_test_prefer_zalrsc_over_zaamo
RUSTFLAGS: ${{ env.RUSTFLAGS }} --cfg atomic_maybe_uninit_test_prefer_zalrsc_over_zaamo
if: startsWith(matrix.target, 'riscv')
# riscv zacas-based sub-word RMW ({8,16}-bit swap/cas with Zacas without Zalrsc & Zabha)
- run: tools/test.sh -vv --tests ${TARGET:-} ${BUILD_STD:-} ${RELEASE:-}
env:
RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+zacas --cfg atomic_maybe_uninit_test_prefer_zacas_over_zalrsc_for_sub_word
RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+zacas --cfg atomic_maybe_uninit_test_prefer_zacas_over_zalrsc_for_sub_word
if: startsWith(matrix.target, 'riscv')
# s390x z196 (arch9)
- run: tools/test.sh -vv --tests ${TARGET:-} ${BUILD_STD:-} ${RELEASE:-}
env:
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ unexpected_cfgs = { level = "warn", check-cfg = [
'cfg(target_pointer_width,values("128"))',
# Known custom cfgs, excluding those that may be set by build script.
# Not public API.
'cfg(atomic_maybe_uninit_test_prefer_zalrsc_over_zaamo,qemu,valgrind)',
'cfg(atomic_maybe_uninit_test_prefer_zalrsc_over_zaamo,atomic_maybe_uninit_test_prefer_zacas_over_zalrsc_for_sub_word,qemu,valgrind)',
# Public APIs, considered unstable unless documented in readme.
# arm: Use cp15_barrier instead of __kuser_memory_barrier on Armv6 Linux/Android.
# Armv6 binaries compiled with this cfg may cause problems when run on Armv7+ chips:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ Currently, x86, x86_64, Arm, AArch64, RISC-V, LoongArch64, Arm64EC, s390x, MIPS,
| m68k \[9] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 ||\[1] |
| xtensa \[9] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 ||\[1] |

\[1] Arm's atomic RMW operations are not available on Armv6-M (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) or Zalrsc extension, such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires target-cpu M68020+ (Linux is M68020 by default). Xtensa's atomic RMW operations are not available on esp32s2.<br>
\[1] Arm's atomic RMW operations are not available on Armv6-M (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) or Zalrsc or Zacas extension, such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires target-cpu M68020+ (Linux is M68020 by default). Xtensa's atomic RMW operations are not available on esp32s2.<br>
\[2] Requires `cmpxchg16b` target feature (enabled by default on Apple and Windows (except Windows 7) targets).<br>
\[3] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.<br>
\[4] Requires `zacas` target feature.<br>
Expand Down
8 changes: 8 additions & 0 deletions src/arch/cfgs/riscv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ macro_rules! cfg_no_atomic_128 {
atomic_maybe_uninit_target_feature = "a",
target_feature = "zalrsc",
atomic_maybe_uninit_target_feature = "zalrsc",
target_feature = "zacas",
atomic_maybe_uninit_target_feature = "zacas",
))]
#[macro_export]
macro_rules! cfg_has_atomic_cas {
Expand All @@ -117,6 +119,8 @@ macro_rules! cfg_has_atomic_cas {
atomic_maybe_uninit_target_feature = "a",
target_feature = "zalrsc",
atomic_maybe_uninit_target_feature = "zalrsc",
target_feature = "zacas",
atomic_maybe_uninit_target_feature = "zacas",
))]
#[macro_export]
macro_rules! cfg_no_atomic_cas {
Expand All @@ -127,6 +131,8 @@ macro_rules! cfg_no_atomic_cas {
atomic_maybe_uninit_target_feature = "a",
target_feature = "zalrsc",
atomic_maybe_uninit_target_feature = "zalrsc",
target_feature = "zacas",
atomic_maybe_uninit_target_feature = "zacas",
)))]
#[macro_export]
macro_rules! cfg_has_atomic_cas {
Expand All @@ -137,6 +143,8 @@ macro_rules! cfg_has_atomic_cas {
atomic_maybe_uninit_target_feature = "a",
target_feature = "zalrsc",
atomic_maybe_uninit_target_feature = "zalrsc",
target_feature = "zacas",
atomic_maybe_uninit_target_feature = "zacas",
)))]
#[macro_export]
macro_rules! cfg_no_atomic_cas {
Expand Down
182 changes: 169 additions & 13 deletions src/arch/riscv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ Generated asm:
- riscv32imac https://godbolt.org/z/sjWWhE7r7
*/

// TODO:
// - {8,16}-bit swap/cas with Zacas without Zalrsc & Zabha (use amocas.w)

#[path = "cfgs/riscv.rs"]
mod cfgs;

Expand Down Expand Up @@ -69,6 +66,8 @@ use crate::utils::{MaybeUninit64 as MaybeUninitDw, Pair};
atomic_maybe_uninit_target_feature = "a",
target_feature = "zalrsc",
atomic_maybe_uninit_target_feature = "zalrsc",
target_feature = "zacas",
atomic_maybe_uninit_target_feature = "zacas",
))]
#[cfg(target_arch = "riscv32")]
macro_rules! w {
Expand All @@ -86,6 +85,8 @@ macro_rules! w {
atomic_maybe_uninit_target_feature = "a",
target_feature = "zalrsc",
atomic_maybe_uninit_target_feature = "zalrsc",
target_feature = "zacas",
atomic_maybe_uninit_target_feature = "zacas",
))]
#[cfg(target_arch = "riscv64")]
macro_rules! w {
Expand Down Expand Up @@ -448,11 +449,14 @@ macro_rules! atomic_sub_word {
not(atomic_maybe_uninit_test_prefer_zalrsc_over_zaamo),
any(target_feature = "zabha", atomic_maybe_uninit_target_feature = "zabha"),
)))]
#[cfg(any(
target_feature = "a",
atomic_maybe_uninit_target_feature = "a",
target_feature = "zalrsc",
atomic_maybe_uninit_target_feature = "zalrsc",
#[cfg(all(
not(atomic_maybe_uninit_test_prefer_zacas_over_zalrsc_for_sub_word),
any(
target_feature = "a",
atomic_maybe_uninit_target_feature = "a",
target_feature = "zalrsc",
atomic_maybe_uninit_target_feature = "zalrsc",
),
))]
impl AtomicSwap for $ty {
#[inline]
Expand Down Expand Up @@ -499,6 +503,67 @@ macro_rules! atomic_sub_word {
out
}
}
#[cfg(not(all(
not(atomic_maybe_uninit_test_prefer_zalrsc_over_zaamo),
any(target_feature = "zabha", atomic_maybe_uninit_target_feature = "zabha"),
)))]
#[cfg(not(all(
not(atomic_maybe_uninit_test_prefer_zacas_over_zalrsc_for_sub_word),
any(
target_feature = "a",
atomic_maybe_uninit_target_feature = "a",
target_feature = "zalrsc",
atomic_maybe_uninit_target_feature = "zalrsc",
),
)))]
#[cfg(any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"))]
impl AtomicSwap for $ty {
#[inline]
unsafe fn atomic_swap(
dst: *mut MaybeUninit<Self>,
val: MaybeUninit<Self>,
order: Ordering,
) -> MaybeUninit<Self> {
debug_assert!(dst as usize % mem::size_of::<$ty>() == 0);
let (dst, shift, mask) = crate::utils::create_sub_word_mask_values(dst);
let mut out: MaybeUninit<Self>;

// SAFETY: the caller must uphold the safety contract.
unsafe {
// Implement sub-word atomic operations using word-sized LL/SC loop.
// See also create_sub_word_mask_values.
macro_rules! swap {
// fence is not emitted because we retry until CAS success
($_fence:tt, $asm_order:tt) => {
asm!(
concat!("sll", w!(), " {mask}, {mask}, {shift}"), // mask <<= shift & 31
concat!("sll", w!(), " {val}, {val}, {shift}"), // val <<= shift & 31
"lw {out}, 0({dst})", // atomic { out = *dst }
"2:", // 'retry:
// out_tmp will be used for later comparison.
"mv {out_tmp}, {out}", // out_tmp = out
"xor {tmp}, {val}, {out}", // tmp = val ^ out
"and {tmp}, {tmp}, {mask}", // tmp &= mask
"xor {tmp}, {tmp}, {out}", // tmp ^= out
concat!("amocas.w", $asm_order, " {out}, {tmp}, 0({dst})"), // atomic { if *dst == out { *dst = tmp } else { out = *dst } }
"bne {out}, {out_tmp}, 2b", // if out != out_tmp { jump 'retry }
concat!("srl", w!(), " {out}, {out}, {shift}"), // out >>= shift & 31
dst = in(reg) ptr_reg!(dst),
val = inout(reg) crate::utils::ZeroExtend::zero_extend(val) => _,
out = out(reg) out,
shift = in(reg) shift,
mask = inout(reg) mask => _,
tmp = out(reg) _,
out_tmp = out(reg) _,
options(nostack, preserves_flags),
)
};
}
atomic_rmw_amocas!(swap, order);
}
out
}
}

// compare_exchange
#[cfg(all(
Expand Down Expand Up @@ -552,11 +617,14 @@ macro_rules! atomic_sub_word {
any(target_feature = "zabha", atomic_maybe_uninit_target_feature = "zabha"),
any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"),
)))]
#[cfg(any(
target_feature = "a",
atomic_maybe_uninit_target_feature = "a",
target_feature = "zalrsc",
atomic_maybe_uninit_target_feature = "zalrsc",
#[cfg(all(
not(atomic_maybe_uninit_test_prefer_zacas_over_zalrsc_for_sub_word),
any(
target_feature = "a",
atomic_maybe_uninit_target_feature = "a",
target_feature = "zalrsc",
atomic_maybe_uninit_target_feature = "zalrsc",
),
))]
impl AtomicCompareExchange for $ty {
#[inline]
Expand Down Expand Up @@ -616,6 +684,94 @@ macro_rules! atomic_sub_word {
}
}
}
#[cfg(not(all(
any(target_feature = "zabha", atomic_maybe_uninit_target_feature = "zabha"),
any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"),
)))]
#[cfg(not(all(
not(atomic_maybe_uninit_test_prefer_zacas_over_zalrsc_for_sub_word),
any(
target_feature = "a",
atomic_maybe_uninit_target_feature = "a",
target_feature = "zalrsc",
atomic_maybe_uninit_target_feature = "zalrsc",
),
)))]
#[cfg(any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"))]
impl AtomicCompareExchange for $ty {
#[inline]
unsafe fn atomic_compare_exchange(
dst: *mut MaybeUninit<Self>,
old: MaybeUninit<Self>,
new: MaybeUninit<Self>,
success: Ordering,
failure: Ordering,
) -> (MaybeUninit<Self>, bool) {
debug_assert!(dst as usize % mem::size_of::<$ty>() == 0);
let order = crate::utils::upgrade_success_ordering(success, failure);
let (dst, shift, mask) = crate::utils::create_sub_word_mask_values(dst);
let mut out: MaybeUninit<Self>;

// SAFETY: the caller must uphold the safety contract.
unsafe {
let mut r: crate::utils::RegSize;
// Implement sub-word atomic operations using word-sized LL/SC loop.
// See also create_sub_word_mask_values.
macro_rules! cmpxchg {
($failure_acquire:tt, $failure_release:tt, $asm_order:tt) => {
asm!(
concat!("sll", w!(), " {mask}, {mask}, {shift}"), // mask <<= shift & 31
concat!("sll", w!(), " {old}, {old}, {shift}"), // old <<= shift & 31
concat!("sll", w!(), " {new}, {new}, {shift}"), // new <<= shift & 31
$failure_release, // fence
"lw {tmp}, 0({dst})", // atomic { tmp = *dst }
"2:", // 'retry:
// out_tmp will be used for later comparison.
"mv {out_tmp}, {tmp}", // out_tmp = tmp
"and {out}, {tmp}, {mask}", // out = tmp & mask
"bne {out}, {old}, 3f", // if out != old { jump 'cmp-fail }
"xor {out}, {tmp}, {new}", // out = tmp ^ new
"and {out}, {out}, {mask}", // out &= mask
"xor {out}, {out}, {tmp}", // out ^= tmp
concat!("amocas.w", $asm_order, " {tmp}, {out}, 0({dst})"), // atomic { if *dst == tmp { *dst = out } else { out = *dst } }
"bne {tmp}, {out_tmp}, 2b", // if tmp != out_tmp { jump 'retry }
"j 4f", // jump 'success
"3:", // 'cmp-fail:
$failure_acquire, // fence
"4:", // 'success:
concat!("srl", w!(), " {out}, {tmp}, {shift}"), // out = tmp >> shift & 31
"and {tmp}, {tmp}, {mask}", // tmp &= mask
"xor {tmp}, {old}, {tmp}", // tmp ^= old
"seqz {tmp}, {tmp}", // if tmp == 0 { tmp = 1 } else { tmp = 0 }
dst = in(reg) ptr_reg!(dst),
old = inout(reg) crate::utils::ZeroExtend::zero_extend(old) => _,
new = inout(reg) crate::utils::ZeroExtend::zero_extend(new) => _,
out = out(reg) out,
shift = in(reg) shift,
mask = inout(reg) mask => _,
tmp = out(reg) r,
out_tmp = out(reg) _,
options(nostack, preserves_flags),
)
};
}
match (order, failure) {
(Ordering::Relaxed, _) => cmpxchg!("", "", ""),
(Ordering::Acquire, Ordering::Relaxed) => cmpxchg!("", "", ".aq"),
(Ordering::Acquire, Ordering::Acquire) => cmpxchg!("fence r, rw", "", ".aq"),
(Ordering::Release, _) => cmpxchg!("", "", ".rl"),
(Ordering::AcqRel, Ordering::Relaxed) => cmpxchg!("", "", ".aqrl"),
(Ordering::AcqRel, Ordering::Acquire) => cmpxchg!("fence r, rw", "", ".aqrl"),
(Ordering::SeqCst, Ordering::Relaxed) => cmpxchg!("", "", ".aqrl"),
(Ordering::SeqCst, Ordering::Acquire) => cmpxchg!("fence r, rw", "", ".aqrl"),
(Ordering::SeqCst, Ordering::SeqCst) => cmpxchg!("fence r, rw", "fence rw,rw", ".aqrl"),
_ => unreachable!(),
}
crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
(out, r != 0)
}
}
}
};
}

Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Currently, x86, x86_64, Arm, AArch64, RISC-V, LoongArch64, Arm64EC, s390x, MIPS,
| m68k \[9] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] |
| xtensa \[9] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] |
\[1] Arm's atomic RMW operations are not available on Armv6-M (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) or Zalrsc extension, such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires target-cpu M68020+ (Linux is M68020 by default). Xtensa's atomic RMW operations are not available on esp32s2.<br>
\[1] Arm's atomic RMW operations are not available on Armv6-M (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) or Zalrsc or Zacas extension, such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires target-cpu M68020+ (Linux is M68020 by default). Xtensa's atomic RMW operations are not available on esp32s2.<br>
\[2] Requires `cmpxchg16b` target feature (enabled by default on Apple and Windows (except Windows 7) targets).<br>
\[3] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.<br>
\[4] Requires `zacas` target feature.<br>
Expand Down
4 changes: 4 additions & 0 deletions tests/no-std-qemu/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,10 @@ fn run() {
test_atomic!(i64);
test_atomic!(u64);
}
cfg_has_atomic_128! {
test_atomic!(i128);
test_atomic!(u128);
}
}

const LOAD_ORDERINGS: [Ordering; 3] = [Ordering::Relaxed, Ordering::Acquire, Ordering::SeqCst];
Expand Down
Loading

0 comments on commit e72e39d

Please sign in to comment.