Add vrndn neon instructions (#1086)

CryZe · web-flow · commit 2920eee1458b · 2021-04-22T06:08:40.000+01:00
This adds the neon instructions for lane-wise rounding without actually
converting the lanes to integers.
diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -2518,32 +2518,6 @@ pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
     vrndaq_f64_(a)
 }
 
-/// Floating-point round to integral, to nearest with ties to even
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintn))]
-pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
-        fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
-    }
-    vrndn_f32_(a)
-}
-
-/// Floating-point round to integral, to nearest with ties to even
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(frintn))]
-pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
-        fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
-    }
-    vrndnq_f32_(a)
-}
-
 /// Floating-point round to integral, to nearest with ties to even
 #[inline]
 #[target_feature(enable = "neon")]
@@ -8884,22 +8858,6 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndn_f32() {
-        let a: f32x2 = f32x2::new(-1.5, 0.5);
-        let e: f32x2 = f32x2::new(-2.0, 0.0);
-        let r: f32x2 = transmute(vrndn_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vrndnq_f32() {
-        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
-        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
-        let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     unsafe fn test_vrndn_f64() {
         let a: f64 = -1.5;
diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -4198,6 +4198,38 @@ pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 vrhaddq_s32_(a, b)
 }
 
+/// Floating-point round to integral, to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
+pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
+        fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
+    }
+vrndn_f32_(a)
+}
+
+/// Floating-point round to integral, to nearest with ties to even
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
+pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
+        fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
+    }
+vrndnq_f32_(a)
+}
+
 /// Saturating add
 #[inline]
 #[target_feature(enable = "neon")]
@@ -14921,6 +14953,22 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndn_f32() {
+        let a: f32x2 = f32x2::new(-1.5, 0.5);
+        let e: f32x2 = f32x2::new(-2.0, 0.0);
+        let r: f32x2 = transmute(vrndn_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vrndnq_f32() {
+        let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
+        let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
+        let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vqadd_u8() {
         let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec
@@ -1401,7 +1401,12 @@ validate -2.0, 0.0, 2.0, 2.0
 
 link-aarch64 = frintn._EXT_
 aarch64 = frintn
-generate float*_t, float64x*_t
+generate float64x*_t
+
+target = fp-armv8
+arm = vrintn
+link-arm = vrintn._EXT_
+generate float*_t
 
 /// Floating-point round to integral, toward minus infinity
 name = vrndm
@@ -3901,4 +3906,4 @@ validate MAX, 7
 
 aarch64 = sqabs
 link-aarch64 = sqabs._EXT_
-generate int64x*_t
+generate int64x*_t