Skip to content

Commit

Permalink
fix - neon type signed unsigned conversions
Browse files Browse the repository at this point in the history
  • Loading branch information
Jamesbarford committed Feb 25, 2025
1 parent 1d71dc6 commit 0cc4897
Show file tree
Hide file tree
Showing 7 changed files with 826 additions and 824 deletions.
668 changes: 337 additions & 331 deletions crates/core_arch/src/aarch64/neon/generated.rs

Large diffs are not rendered by default.

847 changes: 413 additions & 434 deletions crates/core_arch/src/arm_shared/neon/generated.rs

Large diffs are not rendered by default.

31 changes: 17 additions & 14 deletions crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ arch_cfgs:
# Generate big endian shuffles
auto_big_endian: true

# We do not want to automatically generate signed/unsigned casts
auto_llvm_sign_conversion: false

# Repeatedly used anchors
# #[stable(feature = "neon_intrinsics", since = "1.59.0")]
neon-stable: &neon-stable
Expand Down Expand Up @@ -1004,7 +1007,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.facgt.{type[3]}.{type[1]}"
arch: aarch64,arm64ec
- '_vcagth_f16(a, b).as_unsigned() as u16'
- '_vcagth_f16(a, b) as u16'

- name: "vcage{neon_type[0].no}"
doc: "Floating-point absolute compare greater than or equal"
Expand Down Expand Up @@ -1064,7 +1067,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.facge.{type[3]}.{type[1]}"
arch: aarch64,arm64ec
- "_vcageh_f16(a, b).as_unsigned() as u16"
- "_vcageh_f16(a, b) as u16"

- name: "vcalt{neon_type[0].no}"
doc: "Floating-point absolute compare less than"
Expand Down Expand Up @@ -1314,7 +1317,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.vcvtfxu2fp.{type[1]}.{type[0]}"
arch: aarch64,arm64ec
- FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a.as_signed(), N]]
- FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N]]


- name: "vcvt{type[2]}_n_{type[1]}_{type[0]}"
Expand Down Expand Up @@ -1406,7 +1409,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]]

- name: "vcvt{type[2]}"
doc: "Fixed-point convert to floating-point"
Expand All @@ -1432,7 +1435,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.vcvtfxu2fp.{type[1]}.{type[0]}"
arch: aarch64,arm64ec
- FnCall: ["_vcvt{type[2]}", ["a.as_signed()", N]]
- FnCall: ["_vcvt{type[2]}", ["a", N]]

- name: "vcvt{type[2]}"
doc: "Fixed-point convert to floating-point"
Expand Down Expand Up @@ -6023,7 +6026,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: ['_vaddlv{neon_type[0].no}', ['a.as_signed()']]
- FnCall: ['_vaddlv{neon_type[0].no}', ['a']]

- name: "vaddlv{neon_type[0].no}"
doc: Unsigned Add Long across Vector
Expand All @@ -6041,7 +6044,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: ['_vaddlv{neon_type[0].no}', ['a.as_signed()']]
- FnCall: ['_vaddlv{neon_type[0].no}', ['a']]

- name: "vsubw_high{neon_type[1].noq}"
doc: Signed Subtract Wide
Expand Down Expand Up @@ -8704,7 +8707,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.uqshrn.i32"
arch: aarch64,arm64ec
- FnCall: ["_vqshrnd_n_u64", ["a.as_signed()", N]]
- FnCall: ["_vqshrnd_n_u64", ["a", N]]

- name: "vqshrn{type[0]}"
doc: "Unsigned saturating shift right narrow"
Expand Down Expand Up @@ -9845,9 +9848,9 @@ intrinsics:
arch: aarch64,arm64ec
- FnCall:
- "_vsm3tt{type[0]}"
- - "a.as_signed()"
- "b.as_signed()"
- "c.as_signed()"
- - "a"
- "b"
- "c"
- "IMM2 as i64"

- name: "vxarq_u64"
Expand Down Expand Up @@ -9877,8 +9880,8 @@ intrinsics:
arch: aarch64,arm64ec
- FnCall:
- "_vxarq_u64"
- - "a.as_signed()"
- "b.as_signed()"
- - "a"
- "b"
- "IMM6 as i64"

- name: "vrnd32x{neon_type.no}"
Expand Down Expand Up @@ -13979,7 +13982,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.uaddlv.i32.{neon_type[0]}"
arch: aarch64,arm64ec
- Identifier: ["_vaddlv{neon_type[0].no}(a.as_signed()).as_unsigned() as u16", Symbol]
- Identifier: ["_vaddlv{neon_type[0].no}(a) as u16", Symbol]

- name: "vmaxv{neon_type[0].no}"
doc: "Horizontal vector max."
Expand Down
48 changes: 23 additions & 25 deletions crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1139,7 +1139,7 @@ intrinsics:
links:
- link: "llvm.arm.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
arch: arm
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]]

- name: "vcvt{neon_type[1].N}_{neon_type[0]}"
doc: "Fixed-point convert to floating-point"
Expand All @@ -1166,7 +1166,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]]

- name: "vcvt{neon_type[1].N}_{neon_type[0]}"
doc: "Fixed-point convert to floating-point"
Expand Down Expand Up @@ -1197,7 +1197,7 @@ intrinsics:
arch: arm
- link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]]


- name: "vcvt{neon_type[1].N}_{neon_type[0]}"
Expand Down Expand Up @@ -8486,9 +8486,9 @@ intrinsics:
safety:
unsafe: [neon]
types:
- [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }.as_signed()']
- [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }.as_signed()']
- [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64, -N as u64]) }.as_signed()']
- [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }']
- [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }']
- [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64, -N as u64]) }']
compose:
- FnCall: [static_assert!, ["{type[2]}"]]
- LLVMLink:
Expand All @@ -8499,7 +8499,7 @@ intrinsics:
links:
- link: "llvm.arm.neon.vqshiftnu.{neon_type[1]}"
arch: arm
- FnCall: ["_vqshrn_n_{neon_type[0]}", ["a.as_signed()", "{type[3]}"]]
- FnCall: ["_vqshrn_n_{neon_type[0]}", ["a", "{type[3]}"]]

- name: "vqshrn_n_{neon_type[0]}"
doc: "Unsigned saturating shift right narrow"
Expand Down Expand Up @@ -8527,7 +8527,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.uqshrn.{neon_type[1]}"
arch: aarch64,arm64ec
- FnCall: ["_vqshrn_n_{neon_type[0]}", ["a.as_signed()", N]]
- FnCall: ["_vqshrn_n_{neon_type[0]}", ["a", N]]

- name: "vqshrun_n_{neon_type[0]}"
doc: "Signed saturating shift right unsigned narrow"
Expand Down Expand Up @@ -10987,9 +10987,9 @@ intrinsics:
safety:
unsafe: [neon]
types:
- [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }.as_signed()']
- [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }.as_signed()']
- [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64, -N as u64]) }.as_signed()']
- [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }']
- [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }']
- [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64, -N as u64]) }']
compose:
- FnCall: [static_assert!, ['N >= 1 && N <= {type[2]}']]
- LLVMLink:
Expand All @@ -11000,7 +11000,7 @@ intrinsics:
links:
- link: "llvm.arm.neon.vqrshiftnu.{neon_type[1]}"
arch: arm
- FnCall: ["_vqrshrn_n{neon_type[0].noq}", ["a.as_signed()", "{type[3]}"]]
- FnCall: ["_vqrshrn_n{neon_type[0].noq}", ["a", "{type[3]}"]]

- name: "vqrshrn_n_{neon_type[0]}"
doc: "Unsigned signed saturating rounded shift right narrow"
Expand Down Expand Up @@ -11028,7 +11028,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.uqrshrn.{neon_type[1]}"
arch: aarch64,arm64ec
- FnCall: ["_vqrshrn_n_{neon_type[0]}", ["a.as_signed()", N]]
- FnCall: ["_vqrshrn_n_{neon_type[0]}", ["a", N]]

- name: "vcvt{neon_type[1].no}_{neon_type[0]}"
doc: "Floating-point convert to unsigned fixed-point, rounding toward zero"
Expand Down Expand Up @@ -13167,7 +13167,7 @@ intrinsics:
arch: aarch64,arm64ec
- link: "llvm.arm.crc32b"
arch: arm
- FnCall: ["___crc32b", ["crc.as_signed()", "data.as_signed() as i32"]]
- FnCall: ["___crc32b", ["crc", "data as u32"]]

- name: "__crc32h"
doc: "CRC32 single round checksum for bytes (16 bits)."
Expand All @@ -13194,7 +13194,7 @@ intrinsics:
arch: aarch64,arm64ec
- link: "llvm.arm.crc32h"
arch: arm
- FnCall: ["___crc32h", ["crc.as_signed()", "data.as_signed() as i32"]]
- FnCall: ["___crc32h", ["crc", "data as u32"]]

- name: "__crc32w"
doc: "CRC32 single round checksum for bytes (32 bits)."
Expand Down Expand Up @@ -13244,7 +13244,7 @@ intrinsics:
arch: aarch64,arm64ec
- link: "llvm.arm.crc32cb"
arch: arm
- FnCall: ["___crc32cb", ["crc.as_signed()", "data.as_signed() as i32"]]
- FnCall: ["___crc32cb", ["crc", "data as u32"]]

- name: "__crc32ch"
doc: "CRC32-C single round checksum for bytes (16 bits)."
Expand All @@ -13271,7 +13271,7 @@ intrinsics:
arch: aarch64,arm64ec
- link: "llvm.arm.crc32ch"
arch: arm
- FnCall: ["___crc32ch", ["crc.as_signed()", "data.as_signed() as i32"]]
- FnCall: ["___crc32ch", ["crc", "data as u32"]]

- name: "__crc32cw"
doc: "CRC32-C single round checksum for bytes (32 bits)."
Expand Down Expand Up @@ -13313,10 +13313,9 @@ intrinsics:
# As the call to `__crc32` does not get inlined, we define an LLVM binding
# here, which is the same as above, and call it directly which results
# in the correct instructions being generated
- Let: [a, i32, 'crc as i32']
- Let: [b, i32, '(data & 0xFFFFFFFF).as_signed() as i32']
- Let: [c, i32, '(data >> 32).as_signed() as i32']
- 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")] fn ___crc32w(crc: i32, data: i32) -> i32;}} ___crc32w(___crc32w(a, b), c).as_unsigned()'
- Let: [b, u32, '(data & 0xFFFFFFFF) as u32']
- Let: [c, u32, '(data >> 32) as u32']
- 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")] fn ___crc32w(crc: u32, data: u32) -> u32;}} ___crc32w(___crc32w(crc, b), c)'

- name: "__crc32cd"
doc: "CRC32-C single round checksum for quad words (64 bits)."
Expand All @@ -13332,10 +13331,9 @@ intrinsics:
types:
- [u32, u64]
compose:
- Let: [a, i32, 'crc as i32']
- Let: [b, i32, '(data & 0xFFFFFFFF).as_signed() as i32']
- Let: [c, i32, '(data >> 32).as_signed() as i32']
- 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")] fn ___crc32cw(crc: i32, data: i32) -> i32;}} ___crc32cw(___crc32cw(a, b), c).as_unsigned() as u32'
- Let: [b, u32, '(data & 0xFFFFFFFF) as u32']
- Let: [c, u32, '(data >> 32) as u32']
- 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")] fn ___crc32cw(crc: u32, data: u32) -> u32;}} ___crc32cw(___crc32cw(crc, b), c)'

- name: "vabs{neon_type.no}"
doc: "Absolute value (wrapping)."
Expand Down
4 changes: 4 additions & 0 deletions crates/stdarch-gen-arm/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ pub struct GlobalContext {
/// Should the yaml file automagically generate big endian shuffling
#[serde(default)]
pub auto_big_endian: Option<bool>,

/// Should all LLVM wrappers convert their arguments to a signed type
#[serde(default)]
pub auto_llvm_sign_conversion: bool,
}

/// Context of an intrinsic group
Expand Down
2 changes: 1 addition & 1 deletion crates/stdarch-gen-arm/src/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ impl Expression {
*self = intrinsic
.llvm_link()
.expect("got LLVMLink wildcard without a LLVM link in `compose`")
.apply_conversions_to_call(fn_call.clone(), ctx.local)?
.apply_conversions_to_call(fn_call.clone(), ctx)?
}
}

Expand Down
50 changes: 31 additions & 19 deletions crates/stdarch-gen-arm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,9 @@ impl LLVMLink {
Ok(())
}

/// Alters all the unsigned types from the signature, as unsupported by LLVM.
/// Alters all the unsigned types from the signature. This is required where
/// a signed and unsigned variant require the same binding to an exposed
/// LLVM instrinsic.
pub fn sanitise_uints(&mut self) {
let transform = |tk: &mut TypeKind| {
if let Some(BaseType::Sized(BaseTypeKind::UInt, size)) = tk.base_type() {
Expand Down Expand Up @@ -603,7 +605,7 @@ impl LLVMLink {
pub fn apply_conversions_to_call(
&self,
mut fn_call: FnCall,
ctx: &LocalContext,
ctx: &Context,
) -> context::Result<Expression> {
use BaseType::{Sized, Unsized};
use BaseTypeKind::{Bool, UInt};
Expand All @@ -618,6 +620,7 @@ impl LLVMLink {
.map(|arg| -> context::Result<Expression> {
if let Expression::Identifier(ref var_name, IdentifierType::Variable) = arg {
let (kind, scope) = ctx
.local
.variables
.get(&var_name.to_string())
.ok_or_else(|| format!("invalid variable {var_name:?} being referenced"))?;
Expand All @@ -627,7 +630,11 @@ impl LLVMLink {
Ok(convert("into", arg))
}
(Argument, Some(Sized(UInt, _) | Unsized(UInt))) => {
Ok(convert("as_signed", arg))
if ctx.global.auto_llvm_sign_conversion {
Ok(convert("as_signed", arg))
} else {
Ok(arg)
}
}
_ => Ok(arg),
}
Expand All @@ -637,22 +644,25 @@ impl LLVMLink {
})
.try_collect()?;

let return_type_requires_conversion = self
.signature
.as_ref()
.and_then(|sig| sig.return_type.as_ref())
.and_then(|ty| {
if let Some(Sized(Bool, bitsize)) = ty.base_type() {
(*bitsize != 8).then_some(Bool)
} else if let Some(Sized(UInt, _) | Unsized(UInt)) = ty.base_type() {
Some(UInt)
} else {
None
}
});
let return_type_conversion = if !ctx.global.auto_llvm_sign_conversion {
None
} else {
self.signature
.as_ref()
.and_then(|sig| sig.return_type.as_ref())
.and_then(|ty| {
if let Some(Sized(Bool, bitsize)) = ty.base_type() {
(*bitsize != 8).then_some(Bool)
} else if let Some(Sized(UInt, _) | Unsized(UInt)) = ty.base_type() {
Some(UInt)
} else {
None
}
})
};

let fn_call = Expression::FnCall(fn_call);
match return_type_requires_conversion {
match return_type_conversion {
Some(Bool) => Ok(convert("into", fn_call)),
Some(UInt) => Ok(convert("as_unsigned", fn_call)),
_ => Ok(fn_call),
Expand Down Expand Up @@ -1509,8 +1519,10 @@ impl Intrinsic {
}

if let Some(llvm_link) = self.llvm_link_mut() {
// Turn all Rust unsigned types into signed
llvm_link.sanitise_uints();
/* Turn all Rust unsigned types into signed if required */
if ctx.global.auto_llvm_sign_conversion {
llvm_link.sanitise_uints();
}
}

if let Some(predicate_form) = ctx.local.predicate_form() {
Expand Down

0 comments on commit 0cc4897

Please sign in to comment.