fix - neon type signed unsigned conversions

Jamesbarford · Amanieu · commit 3fc934902f3f · 2025-02-26T12:15:15.000Z
diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs
diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -5,6 +5,9 @@ arch_cfgs:
 # Generate big endian shuffles
 auto_big_endian: true
 
+# We do not want to automatically generate signed/unsigned casts
+auto_llvm_sign_conversion: false
+
 # Repeatedly used anchors
 # #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 neon-stable: &neon-stable
@@ -1004,7 +1007,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.facgt.{type[3]}.{type[1]}"
               arch: aarch64,arm64ec
-      - '_vcagth_f16(a, b).as_unsigned() as u16'
+      - '_vcagth_f16(a, b) as u16'
 
   - name: "vcage{neon_type[0].no}"
     doc: "Floating-point absolute compare greater than or equal"
@@ -1064,7 +1067,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.facge.{type[3]}.{type[1]}"
               arch: aarch64,arm64ec
-      - "_vcageh_f16(a, b).as_unsigned() as u16"
+      - "_vcageh_f16(a, b) as u16"
 
   - name: "vcalt{neon_type[0].no}"
     doc: "Floating-point absolute compare less than"
@@ -1314,7 +1317,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.vcvtfxu2fp.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
-      - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a.as_signed(), N]]
+      - FnCall: ["_vcvt{type[2]}_n_{type[1]}_{type[0]}", [a, N]]
 
 
   - name: "vcvt{type[2]}_n_{type[1]}_{type[0]}"
@@ -1406,7 +1409,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
               arch: aarch64,arm64ec
-      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
+      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]]
 
   - name: "vcvt{type[2]}"
     doc: "Fixed-point convert to floating-point"
@@ -1432,7 +1435,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.vcvtfxu2fp.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
-      - FnCall: ["_vcvt{type[2]}", ["a.as_signed()", N]]
+      - FnCall: ["_vcvt{type[2]}", ["a", N]]
 
   - name: "vcvt{type[2]}"
     doc: "Fixed-point convert to floating-point"
@@ -6023,7 +6026,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}"
               arch: aarch64,arm64ec
-      - FnCall: ['_vaddlv{neon_type[0].no}', ['a.as_signed()']]
+      - FnCall: ['_vaddlv{neon_type[0].no}', ['a']]
 
   - name: "vaddlv{neon_type[0].no}"
     doc: Unsigned Add Long across Vector
@@ -6041,7 +6044,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.uaddlv.{type[2]}.{neon_type[0]}"
               arch: aarch64,arm64ec
-      - FnCall: ['_vaddlv{neon_type[0].no}', ['a.as_signed()']]
+      - FnCall: ['_vaddlv{neon_type[0].no}', ['a']]
 
   - name: "vsubw_high{neon_type[1].noq}"
     doc: Signed Subtract Wide
@@ -8704,7 +8707,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.uqshrn.i32"
               arch: aarch64,arm64ec
-      - FnCall: ["_vqshrnd_n_u64", ["a.as_signed()", N]]
+      - FnCall: ["_vqshrnd_n_u64", ["a", N]]
 
   - name: "vqshrn{type[0]}"
     doc: "Unsigned saturating shift right narrow"
@@ -9845,9 +9848,9 @@ intrinsics:
               arch: aarch64,arm64ec
       - FnCall:
           - "_vsm3tt{type[0]}"
-          - - "a.as_signed()"
-            - "b.as_signed()"
-            - "c.as_signed()"
+          - - "a"
+            - "b"
+            - "c"
             - "IMM2 as i64"
 
   - name: "vxarq_u64"
@@ -9877,8 +9880,8 @@ intrinsics:
               arch: aarch64,arm64ec
       - FnCall:
           - "_vxarq_u64"
-          - - "a.as_signed()"
-            - "b.as_signed()"
+          - - "a"
+            - "b"
             - "IMM6 as i64"
 
   - name: "vrnd32x{neon_type.no}"
@@ -13979,7 +13982,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.uaddlv.i32.{neon_type[0]}"
               arch: aarch64,arm64ec
-      - Identifier: ["_vaddlv{neon_type[0].no}(a.as_signed()).as_unsigned() as u16", Symbol]
+      - Identifier: ["_vaddlv{neon_type[0].no}(a) as u16", Symbol]
 
   - name: "vmaxv{neon_type[0].no}"
     doc: "Horizontal vector max."
diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -1139,7 +1139,7 @@ intrinsics:
           links:
             - link: "llvm.arm.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
               arch: arm
-      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
+      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]]
 
   - name: "vcvt{neon_type[1].N}_{neon_type[0]}"
     doc: "Fixed-point convert to floating-point"
@@ -1166,7 +1166,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
               arch: aarch64,arm64ec
-      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
+      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]]
 
   - name: "vcvt{neon_type[1].N}_{neon_type[0]}"
     doc: "Fixed-point convert to floating-point"
@@ -1197,7 +1197,7 @@ intrinsics:
               arch: arm
             - link: "llvm.aarch64.neon.vcvtfxu2fp.{neon_type[1]}.{neon_type[0]}"
               arch: aarch64,arm64ec
-      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a.as_signed()", N]]
+      - FnCall: ["_vcvt{neon_type[1].N}_{neon_type[0]}", ["a", N]]
 
 
   - name: "vcvt{neon_type[1].N}_{neon_type[0]}"
@@ -8486,9 +8486,9 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }.as_signed()']
-      - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }.as_signed()']
-      - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64, -N as u64]) }.as_signed()']
+      - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }']
+      - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }']
+      - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64, -N as u64]) }']
     compose:
       - FnCall: [static_assert!, ["{type[2]}"]]
       - LLVMLink:
@@ -8499,7 +8499,7 @@ intrinsics:
           links:
             - link: "llvm.arm.neon.vqshiftnu.{neon_type[1]}"
               arch: arm
-      - FnCall: ["_vqshrn_n_{neon_type[0]}", ["a.as_signed()", "{type[3]}"]]
+      - FnCall: ["_vqshrn_n_{neon_type[0]}", ["a", "{type[3]}"]]
 
   - name: "vqshrn_n_{neon_type[0]}"
     doc: "Unsigned saturating shift right narrow"
@@ -8527,7 +8527,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.uqshrn.{neon_type[1]}"
               arch: aarch64,arm64ec
-      - FnCall: ["_vqshrn_n_{neon_type[0]}", ["a.as_signed()", N]]
+      - FnCall: ["_vqshrn_n_{neon_type[0]}", ["a", N]]
 
   - name: "vqshrun_n_{neon_type[0]}"
     doc: "Signed saturating shift right unsigned narrow"
@@ -10987,9 +10987,9 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }.as_signed()']
-      - [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }.as_signed()']
-      - [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64, -N as u64]) }.as_signed()']
+      - [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }']
+      - [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }']
+      - [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64, -N as u64]) }']
     compose:
       - FnCall: [static_assert!, ['N >= 1 && N <= {type[2]}']]
       - LLVMLink:
@@ -11000,7 +11000,7 @@ intrinsics:
           links:
             - link: "llvm.arm.neon.vqrshiftnu.{neon_type[1]}"
               arch: arm
-      - FnCall: ["_vqrshrn_n{neon_type[0].noq}", ["a.as_signed()", "{type[3]}"]]
+      - FnCall: ["_vqrshrn_n{neon_type[0].noq}", ["a", "{type[3]}"]]
 
   - name: "vqrshrn_n_{neon_type[0]}"
     doc: "Unsigned signed saturating rounded shift right narrow"
@@ -11028,7 +11028,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.uqrshrn.{neon_type[1]}"
               arch: aarch64,arm64ec
-      - FnCall: ["_vqrshrn_n_{neon_type[0]}", ["a.as_signed()", N]]
+      - FnCall: ["_vqrshrn_n_{neon_type[0]}", ["a", N]]
 
   - name: "vcvt{neon_type[1].no}_{neon_type[0]}"
     doc: "Floating-point convert to unsigned fixed-point, rounding toward zero"
@@ -13167,7 +13167,7 @@ intrinsics:
               arch: aarch64,arm64ec
             - link: "llvm.arm.crc32b"
               arch: arm
-      - FnCall: ["___crc32b", ["crc.as_signed()", "data.as_signed() as i32"]]
+      - FnCall: ["___crc32b", ["crc", "data as u32"]]
 
   - name: "__crc32h"
     doc: "CRC32 single round checksum for bytes (16 bits)."
@@ -13194,7 +13194,7 @@ intrinsics:
               arch: aarch64,arm64ec
             - link: "llvm.arm.crc32h"
               arch: arm
-      - FnCall: ["___crc32h", ["crc.as_signed()", "data.as_signed() as i32"]]
+      - FnCall: ["___crc32h", ["crc", "data as u32"]]
 
   - name: "__crc32w"
     doc: "CRC32 single round checksum for bytes (32 bits)."
@@ -13244,7 +13244,7 @@ intrinsics:
               arch: aarch64,arm64ec
             - link: "llvm.arm.crc32cb"
               arch: arm
-      - FnCall: ["___crc32cb", ["crc.as_signed()", "data.as_signed() as i32"]]
+      - FnCall: ["___crc32cb", ["crc", "data as u32"]]
 
   - name: "__crc32ch"
     doc: "CRC32-C single round checksum for bytes (16 bits)."
@@ -13271,7 +13271,7 @@ intrinsics:
               arch: aarch64,arm64ec
             - link: "llvm.arm.crc32ch"
               arch: arm
-      - FnCall: ["___crc32ch", ["crc.as_signed()", "data.as_signed() as i32"]]
+      - FnCall: ["___crc32ch", ["crc", "data as u32"]]
 
   - name: "__crc32cw"
     doc: "CRC32-C single round checksum for bytes (32 bits)."
@@ -13313,10 +13313,9 @@ intrinsics:
       # As the call to `__crc32` does not get inlined, we define an LLVM binding
       # here, which is the same as above, and call it directly which results 
       # in the correct instructions being generated
-      - Let: [a, i32, 'crc as i32']
-      - Let: [b, i32, '(data & 0xFFFFFFFF).as_signed() as i32']
-      - Let: [c, i32, '(data >> 32).as_signed() as i32']
-      - 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")] fn ___crc32w(crc: i32, data: i32) -> i32;}} ___crc32w(___crc32w(a, b), c).as_unsigned()'
+      - Let: [b, u32, '(data & 0xFFFFFFFF) as u32']
+      - Let: [c, u32, '(data >> 32) as u32']
+      - 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32w")] fn ___crc32w(crc: u32, data: u32) -> u32;}} ___crc32w(___crc32w(crc, b), c)'
 
   - name: "__crc32cd"
     doc: "CRC32-C single round checksum for quad words (64 bits)."
@@ -13332,10 +13331,9 @@ intrinsics:
     types:
       - [u32, u64]
     compose:
-      - Let: [a, i32, 'crc as i32']
-      - Let: [b, i32, '(data & 0xFFFFFFFF).as_signed() as i32']
-      - Let: [c, i32, '(data >> 32).as_signed() as i32']
-      - 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")] fn ___crc32cw(crc: i32, data: i32) -> i32;}} ___crc32cw(___crc32cw(a, b), c).as_unsigned() as u32'
+      - Let: [b, u32, '(data & 0xFFFFFFFF) as u32']
+      - Let: [c, u32, '(data >> 32) as u32']
+      - 'unsafe extern "unadjusted" {{ #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32cw")] fn ___crc32cw(crc: u32, data: u32) -> u32;}} ___crc32cw(___crc32cw(crc, b), c)'
 
   - name: "vabs{neon_type.no}"
     doc: "Absolute value (wrapping)."
diff --git a/crates/stdarch-gen-arm/src/context.rs b/crates/stdarch-gen-arm/src/context.rs
@@ -39,6 +39,10 @@ pub struct GlobalContext {
     /// Should the yaml file automagically generate big endian shuffling
     #[serde(default)]
     pub auto_big_endian: Option<bool>,
+
+    /// Should all LLVM wrappers convert their arguments to a signed type
+    #[serde(default)]
+    pub auto_llvm_sign_conversion: bool,
 }
 
 /// Context of an intrinsic group
diff --git a/crates/stdarch-gen-arm/src/expression.rs b/crates/stdarch-gen-arm/src/expression.rs
@@ -203,7 +203,7 @@ impl Expression {
                         *self = intrinsic
                             .llvm_link()
                             .expect("got LLVMLink wildcard without a LLVM link in `compose`")
-                            .apply_conversions_to_call(fn_call.clone(), ctx.local)?
+                            .apply_conversions_to_call(fn_call.clone(), ctx)?
                     }
                 }
 
diff --git a/crates/stdarch-gen-arm/src/intrinsic.rs b/crates/stdarch-gen-arm/src/intrinsic.rs
@@ -548,7 +548,9 @@ impl LLVMLink {
         Ok(())
     }
 
-    /// Alters all the unsigned types from the signature, as unsupported by LLVM.
+    /// Alters all the unsigned types from the signature. This is required where
+    /// a signed and unsigned variant require the same binding to an exposed
+    /// LLVM instrinsic.
     pub fn sanitise_uints(&mut self) {
         let transform = |tk: &mut TypeKind| {
             if let Some(BaseType::Sized(BaseTypeKind::UInt, size)) = tk.base_type() {
@@ -603,7 +605,7 @@ impl LLVMLink {
     pub fn apply_conversions_to_call(
         &self,
         mut fn_call: FnCall,
-        ctx: &LocalContext,
+        ctx: &Context,
     ) -> context::Result<Expression> {
         use BaseType::{Sized, Unsized};
         use BaseTypeKind::{Bool, UInt};
@@ -618,6 +620,7 @@ impl LLVMLink {
             .map(|arg| -> context::Result<Expression> {
                 if let Expression::Identifier(ref var_name, IdentifierType::Variable) = arg {
                     let (kind, scope) = ctx
+                        .local
                         .variables
                         .get(&var_name.to_string())
                         .ok_or_else(|| format!("invalid variable {var_name:?} being referenced"))?;
@@ -627,7 +630,11 @@ impl LLVMLink {
                             Ok(convert("into", arg))
                         }
                         (Argument, Some(Sized(UInt, _) | Unsized(UInt))) => {
-                            Ok(convert("as_signed", arg))
+                            if ctx.global.auto_llvm_sign_conversion {
+                                Ok(convert("as_signed", arg))
+                            } else {
+                                Ok(arg)
+                            }
                         }
                         _ => Ok(arg),
                     }
@@ -637,22 +644,25 @@ impl LLVMLink {
             })
             .try_collect()?;
 
-        let return_type_requires_conversion = self
-            .signature
-            .as_ref()
-            .and_then(|sig| sig.return_type.as_ref())
-            .and_then(|ty| {
-                if let Some(Sized(Bool, bitsize)) = ty.base_type() {
-                    (*bitsize != 8).then_some(Bool)
-                } else if let Some(Sized(UInt, _) | Unsized(UInt)) = ty.base_type() {
-                    Some(UInt)
-                } else {
-                    None
-                }
-            });
+        let return_type_conversion = if !ctx.global.auto_llvm_sign_conversion {
+            None
+        } else {
+            self.signature
+                .as_ref()
+                .and_then(|sig| sig.return_type.as_ref())
+                .and_then(|ty| {
+                    if let Some(Sized(Bool, bitsize)) = ty.base_type() {
+                        (*bitsize != 8).then_some(Bool)
+                    } else if let Some(Sized(UInt, _) | Unsized(UInt)) = ty.base_type() {
+                        Some(UInt)
+                    } else {
+                        None
+                    }
+                })
+        };
 
         let fn_call = Expression::FnCall(fn_call);
-        match return_type_requires_conversion {
+        match return_type_conversion {
             Some(Bool) => Ok(convert("into", fn_call)),
             Some(UInt) => Ok(convert("as_unsigned", fn_call)),
             _ => Ok(fn_call),
@@ -1509,8 +1519,10 @@ impl Intrinsic {
         }
 
         if let Some(llvm_link) = self.llvm_link_mut() {
-            // Turn all Rust unsigned types into signed
-            llvm_link.sanitise_uints();
+            /* Turn all Rust unsigned types into signed if required */
+            if ctx.global.auto_llvm_sign_conversion {
+                llvm_link.sanitise_uints();
+            }
         }
 
         if let Some(predicate_form) = ctx.local.predicate_form() {

Original file line number	Diff line number	Diff line change
`@@ -203,7 +203,7 @@ impl Expression {`
`203`	`203`	`*self = intrinsic`
`204`	`204`	`.llvm_link()`
`205`	`205`	.expect("got LLVMLink wildcard without a LLVM link in `compose`")
`206`		`- .apply_conversions_to_call(fn_call.clone(), ctx.local)?`
	`206`	`+ .apply_conversions_to_call(fn_call.clone(), ctx)?`
`207`	`207`	`}`
`208`	`208`	`}`
`209`	`209`