Skip to content

Commit

Permalink
riscv: Support 64-bit atomics on RV32, 128-bit atomics on RV64
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed Feb 23, 2025
1 parent 69b596c commit b3c1a4d
Show file tree
Hide file tree
Showing 4 changed files with 263 additions and 47 deletions.
43 changes: 23 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,31 +29,34 @@ Currently, x86, x86_64, Arm, AArch64, RISC-V, LoongArch64, Arm64EC, s390x, MIPS,
| arm (except for M-profile) \[3] | i64,u64 |||
| aarch64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 |||
| riscv32 | isize,usize,i8,u8,i16,u16,i32,u32 ||\[1] |
| riscv32 (+zacas) \[4] | i64,u64 |||
| riscv64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 ||\[1] |
| loongarch64 \[6] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 |||
| arm64ec \[7] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 |||
| s390x \[7] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 |||
| mips / mips32r6 \[8] | isize,usize,i8,u8,i16,u16,i32,u32 |||
| mips64 / mips64r6 \[8] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 |||
| powerpc \[8] | isize,usize,i8,u8,i16,u16,i32,u32 |||
| powerpc64 \[8] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 |||
| powerpc64 (+quadword-atomics) \[4] \[8]| i128,u128 |||
| msp430 \[8] (experimental) | isize,usize,i8,u8,i16,u16 |||
| avr \[8] (experimental) | isize,usize,i8,u8,i16,u16 |||
| sparc \[5] \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 |||
| sparc64 \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 |||
| hexagon \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 |||
| m68k \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 ||\[1] |
| xtensa \[8] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 ||\[1] |
| riscv64 (+zacas) \[4] | i128,u128 |||
| loongarch64 \[7] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 |||
| arm64ec \[8] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 |||
| s390x \[8] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 |||
| mips / mips32r6 \[9] | isize,usize,i8,u8,i16,u16,i32,u32 |||
| mips64 / mips64r6 \[9] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 |||
| powerpc \[9] | isize,usize,i8,u8,i16,u16,i32,u32 |||
| powerpc64 \[9] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 |||
| powerpc64 (+quadword-atomics) \[5] \[9]| i128,u128 |||
| msp430 \[9] (experimental) | isize,usize,i8,u8,i16,u16 |||
| avr \[9] (experimental) | isize,usize,i8,u8,i16,u16 |||
| sparc \[6] \[9] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 |||
| sparc64 \[9] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 |||
| hexagon \[9] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 |||
| m68k \[9] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 ||\[1] |
| xtensa \[9] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 ||\[1] |

\[1] Arm's atomic RMW operations are not available on Armv6-M (thumbv6m). RISC-V's atomic RMW operations are not available on targets without the A (or G which means IMAFD) or Zalrsc extension, such as riscv32i, riscv32imc, etc. M68k's atomic RMW operations requires target-cpu M68020+ (Linux is M68020 by default). Xtensa's atomic RMW operations are not available on esp32s2.<br>
\[2] Requires `cmpxchg16b` target feature (enabled by default on Apple and Windows (except Windows 7) targets).<br>
\[3] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.<br>
\[4] Requires `quadword-atomics` target feature (enabled by default on powerpc64le).<br>
\[5] Requires `v9` or `leoncasa` target feature (enabled by default on Linux).<br>
\[6] Requires Rust 1.72+.<br>
\[7] Requires Rust 1.84+.<br>
\[8] Requires nightly due to `#![feature(asm_experimental_arch)]`.<br>
\[4] Requires `zacas` target feature.<br>
\[5] Requires `quadword-atomics` target feature (enabled by default on powerpc64le).<br>
\[6] Requires `v9` or `leoncasa` target feature (enabled by default on Linux).<br>
\[7] Requires Rust 1.72+.<br>
\[8] Requires Rust 1.84+.<br>
\[9] Requires nightly due to `#![feature(asm_experimental_arch)]`.<br>

See also [Atomic operation overview by architecture](https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md) for more information about atomic operations in these architectures.

Expand Down
60 changes: 54 additions & 6 deletions src/arch/cfgs/riscv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,78 @@ macro_rules! cfg_has_atomic_32 {
macro_rules! cfg_no_atomic_32 {
($($tt:tt)*) => {};
}
#[cfg(target_arch = "riscv32")]
#[cfg(any(
all(
target_arch = "riscv32",
any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"),
),
target_arch = "riscv64",
))]
#[macro_export]
macro_rules! cfg_has_atomic_64 {
($($tt:tt)*) => { $($tt)* };
}
#[cfg(any(
all(
target_arch = "riscv32",
any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"),
),
target_arch = "riscv64",
))]
#[macro_export]
macro_rules! cfg_no_atomic_64 {
($($tt:tt)*) => {};
}
#[cfg(not(any(
all(
target_arch = "riscv32",
any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"),
),
target_arch = "riscv64",
)))]
#[macro_export]
macro_rules! cfg_has_atomic_64 {
($($tt:tt)*) => {};
}
#[cfg(target_arch = "riscv32")]
#[cfg(not(any(
all(
target_arch = "riscv32",
any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"),
),
target_arch = "riscv64",
)))]
#[macro_export]
macro_rules! cfg_no_atomic_64 {
($($tt:tt)*) => { $($tt)* };
}
#[cfg(target_arch = "riscv64")]
#[cfg(all(
target_arch = "riscv64",
any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"),
))]
#[macro_export]
macro_rules! cfg_has_atomic_64 {
macro_rules! cfg_has_atomic_128 {
($($tt:tt)*) => { $($tt)* };
}
#[cfg(target_arch = "riscv64")]
#[cfg(all(
target_arch = "riscv64",
any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"),
))]
#[macro_export]
macro_rules! cfg_no_atomic_64 {
macro_rules! cfg_no_atomic_128 {
($($tt:tt)*) => {};
}
#[cfg(not(all(
target_arch = "riscv64",
any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"),
)))]
#[macro_export]
macro_rules! cfg_has_atomic_128 {
($($tt:tt)*) => {};
}
#[cfg(not(all(
target_arch = "riscv64",
any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"),
)))]
#[macro_export]
macro_rules! cfg_no_atomic_128 {
($($tt:tt)*) => { $($tt)* };
Expand Down
164 changes: 163 additions & 1 deletion src/arch/riscv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ Generated asm:
*/

// TODO:
// - 64-bit/128-bit atomics on RV32/RV64 with Zacas
// - {8,16}-bit swap/cas with Zacas without Zalrsc & Zabha (use amocas.w)

#[path = "cfgs/riscv.rs"]
Expand Down Expand Up @@ -597,3 +596,166 @@ atomic!(usize, "w");
atomic!(isize, "d");
#[cfg(target_pointer_width = "64")]
atomic!(usize, "d");

#[rustfmt::skip]
macro_rules! atomic_dw {
($ty:ident, $size:tt, $reg_size:tt, $reg_size_offset:tt) => {
#[cfg(any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"))]
impl AtomicLoad for $ty {
#[inline]
unsafe fn atomic_load(
src: *const MaybeUninit<Self>,
order: Ordering,
) -> MaybeUninit<Self> {
debug_assert!(src as usize % mem::size_of::<$ty>() == 0);
let (out_lo, out_hi);

// SAFETY: the caller must uphold the safety contract,
// the cfg guarantee that the CPU supports Zacas extension.
unsafe {
macro_rules! load {
($fence:tt, $asm_order:tt) => {
asm!(
$fence, // fence
concat!("amocas.", $size, $asm_order, " a2, a2, 0({src})"), // atomic { if *dst == a2:a3 { *dst = a2:a3 } else { a2:a3 = *dst } }
src = in(reg) ptr_reg!(src),
inout("a2") 0_u64 => out_lo,
inout("a3") 0_u64 => out_hi,
options(nostack, preserves_flags),
)
};
}
atomic_rmw_amocas!(load, order);
MaybeUninit128 { pair: Pair { lo: out_lo, hi: out_hi } }.$ty
}
}
}
#[cfg(any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"))]
impl AtomicStore for $ty {
#[inline]
unsafe fn atomic_store(
dst: *mut MaybeUninit<Self>,
val: MaybeUninit<Self>,
order: Ordering,
) {
// SAFETY: the caller must uphold the safety contract.
unsafe {
<$ty as AtomicSwap>::atomic_swap(dst, val, order);
}
}
}
#[cfg(any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"))]
impl AtomicSwap for $ty {
#[inline]
unsafe fn atomic_swap(
dst: *mut MaybeUninit<Self>,
val: MaybeUninit<Self>,
order: Ordering,
) -> MaybeUninit<Self> {
debug_assert!(dst as usize % mem::size_of::<$ty>() == 0);
let val = MaybeUninit128 { $ty: val };
let (mut prev_lo, mut prev_hi);

// SAFETY: the caller must uphold the safety contract,
// the cfg guarantee that the CPU supports Zacas extension.
unsafe {
macro_rules! swap {
// fence is not emitted because we retry until CAS success
($_fence:tt, $asm_order:tt) => {
asm!(
// This is not single-copy atomic reads, but this is ok because subsequent
// CAS will check for consistency.
concat!("l", $reg_size, " a4, ({dst})"), // atomic { a4 = *dst }
concat!("l", $reg_size, " a5, ", $reg_size_offset, "({dst})"), // atomic { a5 = *dst.add($reg_size_offset) }
"2:", // 'retry:
// a4:a5 will be used for later comparison.
"mv {tmp_lo}, a4", // tmp_lo = a4
"mv {tmp_hi}, a5", // tmp_hi = a5
concat!("amocas.", $size, $asm_order, " a4, a2, 0({dst})"), // atomic { if *dst == a4:a5 { *dst = a2:a3 } else { a4:a5 = *dst } }
"xor {tmp_lo}, {tmp_lo}, a4", // tmp_lo ^= a4
"xor {tmp_hi}, {tmp_hi}, a5", // tmp_hi ^= a5
"xor {tmp_lo}, {tmp_lo}, {tmp_hi}", // tmp_lo ^= tmp_hi
"bnez {tmp_lo}, 2b", // if tmp_lo != 0 { jump 'retry }
dst = in(reg) ptr_reg!(dst),
tmp_lo = out(reg) _,
tmp_hi = out(reg) _,
// must be allocated to even/odd register pair
out("a4") prev_lo,
out("a5") prev_hi,
// must be allocated to even/odd register pair
in("a2") val.pair.lo,
in("a3") val.pair.hi,
options(nostack, preserves_flags),
)
};
}
atomic_rmw_amocas!(swap, order);
MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$ty
}
}
}
#[cfg(any(target_feature = "zacas", atomic_maybe_uninit_target_feature = "zacas"))]
impl AtomicCompareExchange for $ty {
#[inline]
unsafe fn atomic_compare_exchange(
dst: *mut MaybeUninit<Self>,
old: MaybeUninit<Self>,
new: MaybeUninit<Self>,
success: Ordering,
failure: Ordering,
) -> (MaybeUninit<Self>, bool) {
debug_assert!(dst as usize % mem::size_of::<$ty>() == 0);
let order = crate::utils::upgrade_success_ordering(success, failure);
let old = MaybeUninit128 { $ty: old };
let new = MaybeUninit128 { $ty: new };
let (prev_lo, prev_hi);

// SAFETY: the caller must uphold the safety contract,
// the cfg guarantee that the CPU supports Zacas extension.
unsafe {
let mut r: crate::utils::RegSize;
macro_rules! cmpxchg {
($fence:tt, $asm_order:tt) => {
asm!(
$fence, // fence
// a4:a5 will be used for later comparison.
"mv {tmp_lo}, a4", // tmp_lo = a4
"mv {tmp_hi}, a5", // tmp_hi = a5
concat!("amocas.", $size, $asm_order, " a4, a2, 0({dst})"), // atomic { if *dst == a4:a5 { *dst = a2:a3 } else { a4:a5 = *dst } }
"xor {tmp_lo}, {tmp_lo}, a4", // tmp_lo ^= a4
"xor {tmp_hi}, {tmp_hi}, a5", // tmp_hi ^= a5
"xor {tmp_lo}, {tmp_lo}, {tmp_hi}", // tmp_lo ^= tmp_hi
"seqz {tmp_lo}, {tmp_lo}", // if tmp_lo == 0 { tmp_lo = 1 } else { tmp_lo = 0 }
dst = in(reg) ptr_reg!(dst),
tmp_lo = out(reg) r,
tmp_hi = out(reg) _,
// must be allocated to even/odd register pair
inout("a4") old.pair.lo => prev_lo,
inout("a5") old.pair.hi => prev_hi,
// must be allocated to even/odd register pair
in("a2") new.pair.lo,
in("a3") new.pair.hi,
options(nostack, preserves_flags),
)
};
}
atomic_rmw_amocas!(cmpxchg, order, failure = failure);
crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test
(
MaybeUninit128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.$ty,
r != 0
)
}
}
}
};
}

#[cfg(target_arch = "riscv32")]
atomic_dw!(i64, "d", "w", "4");
#[cfg(target_arch = "riscv32")]
atomic_dw!(u64, "d", "w", "4");
#[cfg(target_arch = "riscv64")]
atomic_dw!(i128, "q", "d", "8");
#[cfg(target_arch = "riscv64")]
atomic_dw!(u128, "q", "d", "8");
Loading

0 comments on commit b3c1a4d

Please sign in to comment.