Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix - neon type signed unsigned conversions #1729

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
668 changes: 337 additions & 331 deletions crates/core_arch/src/aarch64/neon/generated.rs

Large diffs are not rendered by default.

847 changes: 413 additions & 434 deletions crates/core_arch/src/arm_shared/neon/generated.rs

Large diffs are not rendered by default.

31 changes: 17 additions & 14 deletions crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ arch_cfgs:
# Generate big endian shuffles
auto_big_endian: true

# We do not want to automatically generate signed/unsigned casts
auto_llvm_sign_conversion: false

# Repeatedly used anchors
# #[stable(feature = "neon_intrinsics", since = "1.59.0")]
neon-stable: &neon-stable
Expand Down Expand Up @@ -1004,7 +1007,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.facgt.{type[3]}.{type[1]}"
arch: aarch64,arm64ec
- '_vcagth_f16(a, b).as_unsigned() as u16'
- '_vcagth_f16(a, b) as u16'

- name: "vcage{neon_type[0].no}"
doc: "Floating-point absolute compare greater than or equal"
Expand Down Expand Up @@ -1064,7 +1067,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.facge.{type[3]}.{type[1]}"
arch: aarch64,arm64ec
- "_vcageh_f16(a, b).as_unsigned() as u16"
- "_vcageh_f16(a, b) as u16"

- name: "vcalt{neon_type[0].no}"
doc: "Floating-point absolute compare less than"
Expand Down Expand Up @@ -1314,7 +1317,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.vcvtfxu2fp.{type[1]}.{type[0]}"
arch: aarch64,arm64ec
- FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a.as_signed(), N]]
- FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N]]


- name: "vcvt{type[2]}_n_{type[1]}_{type[0]}"
Expand Down Expand Up @@ -1406,7 +1409,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]]

- name: "vcvt{type[2]}"
doc: "Fixed-point convert to floating-point"
Expand All @@ -1432,7 +1435,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.vcvtfxu2fp.{type[1]}.{type[0]}"
arch: aarch64,arm64ec
- FnCall: ["_vcvt{type[2]}", ["a.as_signed()", N]]
- FnCall: ["_vcvt{type[2]}", ["a", N]]

- name: "vcvt{type[2]}"
doc: "Fixed-point convert to floating-point"
Expand Down Expand Up @@ -6023,7 +6026,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: ['_vaddlv{neon_type[0].no}', ['a.as_signed()']]
- FnCall: ['_vaddlv{neon_type[0].no}', ['a']]

- name: "vaddlv{neon_type[0].no}"
doc: Unsigned Add Long across Vector
Expand All @@ -6041,7 +6044,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: ['_vaddlv{neon_type[0].no}', ['a.as_signed()']]
- FnCall: ['_vaddlv{neon_type[0].no}', ['a']]

- name: "vsubw_high{neon_type[1].noq}"
doc: Signed Subtract Wide
Expand Down Expand Up @@ -8704,7 +8707,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.uqshrn.i32"
arch: aarch64,arm64ec
- FnCall: ["_vqshrnd_n_u64", ["a.as_signed()", N]]
- FnCall: ["_vqshrnd_n_u64", ["a", N]]

- name: "vqshrn{type[0]}"
doc: "Unsigned saturating shift right narrow"
Expand Down Expand Up @@ -9845,9 +9848,9 @@ intrinsics:
arch: aarch64,arm64ec
- FnCall:
- "_vsm3tt{type[0]}"
- - "a.as_signed()"
- "b.as_signed()"
- "c.as_signed()"
- - "a"
- "b"
- "c"
- "IMM2 as i64"

- name: "vxarq_u64"
Expand Down Expand Up @@ -9877,8 +9880,8 @@ intrinsics:
arch: aarch64,arm64ec
- FnCall:
- "_vxarq_u64"
- - "a.as_signed()"
- "b.as_signed()"
- - "a"
- "b"
- "IMM6 as i64"

- name: "vrnd32x{neon_type.no}"
Expand Down Expand Up @@ -13979,7 +13982,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.uaddlv.i32.{neon_type[0]}"
arch: aarch64,arm64ec
- Identifier: ["_vaddlv{neon_type[0].no}(a.as_signed()).as_unsigned() as u16", Symbol]
- Identifier: ["_vaddlv{neon_type[0].no}(a) as u16", Symbol]

- name: "vmaxv{neon_type[0].no}"
doc: "Horizontal vector max."
Expand Down
48 changes: 23 additions & 25 deletions crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1139,7 +1139,7 @@ intrinsics:
links:
- link: "llvm.arm.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
arch: arm
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]]

- name: "vcvt{neon_type[1].N}_{neon_type[0]}"
doc: "Fixed-point convert to floating-point"
Expand All @@ -1166,7 +1166,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]]

- name: "vcvt{neon_type[1].N}_{neon_type[0]}"
doc: "Fixed-point convert to floating-point"
Expand Down Expand Up @@ -1197,7 +1197,7 @@ intrinsics:
arch: arm
- link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
arch: aarch64,arm64ec
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
- FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]]


- name: "vcvt{neon_type[1].N}_{neon_type[0]}"
Expand Down Expand Up @@ -8486,9 +8486,9 @@ intrinsics:
safety:
unsafe: [neon]
types:
- [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }.as_signed()']
- [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }.as_signed()']
- [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64, -N as u64]) }.as_signed()']
- [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }']
- [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }']
- [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64, -N as u64]) }']
compose:
- FnCall: [static_assert!, ["{type[2]}"]]
- LLVMLink:
Expand All @@ -8499,7 +8499,7 @@ intrinsics:
links:
- link: "llvm.arm.neon.vqshiftnu.{neon_type[1]}"
arch: arm
- FnCall: ["_vqshrn_n_{neon_type[0]}", ["a.as_signed()", "{type[3]}"]]
- FnCall: ["_vqshrn_n_{neon_type[0]}", ["a", "{type[3]}"]]

- name: "vqshrn_n_{neon_type[0]}"
doc: "Unsigned saturating shift right narrow"
Expand Down Expand Up @@ -8527,7 +8527,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.uqshrn.{neon_type[1]}"
arch: aarch64,arm64ec
- FnCall: ["_vqshrn_n_{neon_type[0]}", ["a.as_signed()", N]]
- FnCall: ["_vqshrn_n_{neon_type[0]}", ["a", N]]

- name: "vqshrun_n_{neon_type[0]}"
doc: "Signed saturating shift right unsigned narrow"
Expand Down Expand Up @@ -10987,9 +10987,9 @@ intrinsics:
safety:
unsafe: [neon]
types:
- [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }.as_signed()']
- [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }.as_signed()']
- [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64, -N as u64]) }.as_signed()']
- [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }']
- [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }']
- [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64, -N as u64]) }']
compose:
- FnCall: [static_assert!, ['N >= 1 && N <= {type[2]}']]
- LLVMLink:
Expand All @@ -11000,7 +11000,7 @@ intrinsics:
links:
- link: "llvm.arm.neon.vqrshiftnu.{neon_type[1]}"
arch: arm
- FnCall: ["_vqrshrn_n{neon_type[0].noq}", ["a.as_signed()", "{type[3]}"]]
- FnCall: ["_vqrshrn_n{neon_type[0].noq}", ["a", "{type[3]}"]]

- name: "vqrshrn_n_{neon_type[0]}"
doc: "Unsigned signed saturating rounded shift right narrow"
Expand Down Expand Up @@ -11028,7 +11028,7 @@ intrinsics:
links:
- link: "llvm.aarch64.neon.uqrshrn.{neon_type[1]}"
arch: aarch64,arm64ec
- FnCall: ["_vqrshrn_n_{neon_type[0]}", ["a.as_signed()", N]]
- FnCall: ["_vqrshrn_n_{neon_type[0]}", ["a", N]]

- name: "vcvt{neon_type[1].no}_{neon_type[0]}"
doc: "Floating-point convert to unsigned fixed-point, rounding toward zero"
Expand Down Expand Up @@ -13167,7 +13167,7 @@ intrinsics:
arch: aarch64,arm64ec
- link: "llvm.arm.crc32b"
arch: arm
- FnCall: ["___crc32b", ["crc.as_signed()", "data.as_signed() as i32"]]
- FnCall: ["___crc32b", ["crc", "data as u32"]]

- name: "__crc32h"
doc: "CRC32 single round checksum for bytes (16 bits)."
Expand All @@ -13194,7 +13194,7 @@ intrinsics:
arch: aarch64,arm64ec
- link: "llvm.arm.crc32h"
arch: arm
- FnCall: ["___crc32h", ["crc.as_signed()", "data.as_signed() as i32"]]
- FnCall: ["___crc32h", ["crc", "data as u32"]]

- name: "__crc32w"
doc: "CRC32 single round checksum for bytes (32 bits)."
Expand Down Expand Up @@ -13244,7 +13244,7 @@ intrinsics:
arch: aarch64,arm64ec
- link: "llvm.arm.crc32cb"
arch: arm
- FnCall: ["___crc32cb", ["crc.as_signed()", "data.as_signed() as i32"]]
- FnCall: ["___crc32cb", ["crc", "data as u32"]]

- name: "__crc32ch"
doc: "CRC32-C single round checksum for bytes (16 bits)."
Expand All @@ -13271,7 +13271,7 @@ intrinsics:
arch: aarch64,arm64ec
- link: "llvm.arm.crc32ch"
arch: arm
- FnCall: ["___crc32ch", ["crc.as_signed()", "data.as_signed() as i32"]]
- FnCall: ["___crc32ch", ["crc", "data as u32"]]

- name: "__crc32cw"
doc: "CRC32-C single round checksum for bytes (32 bits)."
Expand Down Expand Up @@ -13313,10 +13313,9 @@ intrinsics:
# As the call to `__crc32` does not get inlined, we define an LLVM binding
# here, which is the same as above, and call it directly which results
# in the correct instructions being generated
- Let: [a, i32, 'crc as i32']
- Let: [b, i32, '(data & 0xFFFFFFFF).as_signed() as i32']
- Let: [c, i32, '(data >> 32).as_signed() as i32']
- 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")] fn ___crc32w(crc: i32, data: i32) -> i32;}} ___crc32w(___crc32w(a, b), c).as_unsigned()'
- Let: [b, u32, '(data & 0xFFFFFFFF) as u32']
- Let: [c, u32, '(data >> 32) as u32']
- 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")] fn ___crc32w(crc: u32, data: u32) -> u32;}} ___crc32w(___crc32w(crc, b), c)'

- name: "__crc32cd"
doc: "CRC32-C single round checksum for quad words (64 bits)."
Expand All @@ -13332,10 +13331,9 @@ intrinsics:
types:
- [u32, u64]
compose:
- Let: [a, i32, 'crc as i32']
- Let: [b, i32, '(data & 0xFFFFFFFF).as_signed() as i32']
- Let: [c, i32, '(data >> 32).as_signed() as i32']
- 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")] fn ___crc32cw(crc: i32, data: i32) -> i32;}} ___crc32cw(___crc32cw(a, b), c).as_unsigned() as u32'
- Let: [b, u32, '(data & 0xFFFFFFFF) as u32']
- Let: [c, u32, '(data >> 32) as u32']
- 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")] fn ___crc32cw(crc: u32, data: u32) -> u32;}} ___crc32cw(___crc32cw(crc, b), c)'

- name: "vabs{neon_type.no}"
doc: "Absolute value (wrapping)."
Expand Down
4 changes: 4 additions & 0 deletions crates/stdarch-gen-arm/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ pub struct GlobalContext {
/// Should the yaml file automagically generate big endian shuffling
#[serde(default)]
pub auto_big_endian: Option<bool>,

/// Should all LLVM wrappers convert their arguments to a signed type
#[serde(default)]
pub auto_llvm_sign_conversion: bool,
}

/// Context of an intrinsic group
Expand Down
2 changes: 1 addition & 1 deletion crates/stdarch-gen-arm/src/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ impl Expression {
*self = intrinsic
.llvm_link()
.expect("got LLVMLink wildcard without a LLVM link in `compose`")
.apply_conversions_to_call(fn_call.clone(), ctx.local)?
.apply_conversions_to_call(fn_call.clone(), ctx)?
}
}

Expand Down
50 changes: 31 additions & 19 deletions crates/stdarch-gen-arm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,9 @@ impl LLVMLink {
Ok(())
}

/// Alters all the unsigned types from the signature, as unsupported by LLVM.
/// Alters all the unsigned types from the signature. This is required where
/// a signed and unsigned variant require the same binding to an exposed
/// LLVM instrinsic.
pub fn sanitise_uints(&mut self) {
let transform = |tk: &mut TypeKind| {
if let Some(BaseType::Sized(BaseTypeKind::UInt, size)) = tk.base_type() {
Expand Down Expand Up @@ -603,7 +605,7 @@ impl LLVMLink {
pub fn apply_conversions_to_call(
&self,
mut fn_call: FnCall,
ctx: &LocalContext,
ctx: &Context,
) -> context::Result<Expression> {
use BaseType::{Sized, Unsized};
use BaseTypeKind::{Bool, UInt};
Expand All @@ -618,6 +620,7 @@ impl LLVMLink {
.map(|arg| -> context::Result<Expression> {
if let Expression::Identifier(ref var_name, IdentifierType::Variable) = arg {
let (kind, scope) = ctx
.local
.variables
.get(&var_name.to_string())
.ok_or_else(|| format!("invalid variable {var_name:?} being referenced"))?;
Expand All @@ -627,7 +630,11 @@ impl LLVMLink {
Ok(convert("into", arg))
}
(Argument, Some(Sized(UInt, _) | Unsized(UInt))) => {
Ok(convert("as_signed", arg))
if ctx.global.auto_llvm_sign_conversion {
Ok(convert("as_signed", arg))
} else {
Ok(arg)
}
}
_ => Ok(arg),
}
Expand All @@ -637,22 +644,25 @@ impl LLVMLink {
})
.try_collect()?;

let return_type_requires_conversion = self
.signature
.as_ref()
.and_then(|sig| sig.return_type.as_ref())
.and_then(|ty| {
if let Some(Sized(Bool, bitsize)) = ty.base_type() {
(*bitsize != 8).then_some(Bool)
} else if let Some(Sized(UInt, _) | Unsized(UInt)) = ty.base_type() {
Some(UInt)
} else {
None
}
});
let return_type_conversion = if !ctx.global.auto_llvm_sign_conversion {
None
} else {
self.signature
.as_ref()
.and_then(|sig| sig.return_type.as_ref())
.and_then(|ty| {
if let Some(Sized(Bool, bitsize)) = ty.base_type() {
(*bitsize != 8).then_some(Bool)
} else if let Some(Sized(UInt, _) | Unsized(UInt)) = ty.base_type() {
Some(UInt)
} else {
None
}
})
};

let fn_call = Expression::FnCall(fn_call);
match return_type_requires_conversion {
match return_type_conversion {
Some(Bool) => Ok(convert("into", fn_call)),
Some(UInt) => Ok(convert("as_unsigned", fn_call)),
_ => Ok(fn_call),
Expand Down Expand Up @@ -1509,8 +1519,10 @@ impl Intrinsic {
}

if let Some(llvm_link) = self.llvm_link_mut() {
// Turn all Rust unsigned types into signed
llvm_link.sanitise_uints();
/* Turn all Rust unsigned types into signed if required */
if ctx.global.auto_llvm_sign_conversion {
llvm_link.sanitise_uints();
}
}

if let Some(predicate_form) = ctx.local.predicate_form() {
Expand Down