Skip to content

Commit 2920eee

Browse files
authored
Add vrndn neon instructions (#1086)
This adds the neon instructions for lane-wise rounding without actually converting the lanes to integers.
1 parent d385078 commit 2920eee

File tree

3 files changed

+55
-44
lines changed

3 files changed

+55
-44
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -2518,32 +2518,6 @@ pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
25182518
vrndaq_f64_(a)
25192519
}
25202520

2521-
/// Floating-point round to integral, to nearest with ties to even
2522-
#[inline]
2523-
#[target_feature(enable = "neon")]
2524-
#[cfg_attr(test, assert_instr(frintn))]
2525-
pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
2526-
#[allow(improper_ctypes)]
2527-
extern "C" {
2528-
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
2529-
fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
2530-
}
2531-
vrndn_f32_(a)
2532-
}
2533-
2534-
/// Floating-point round to integral, to nearest with ties to even
2535-
#[inline]
2536-
#[target_feature(enable = "neon")]
2537-
#[cfg_attr(test, assert_instr(frintn))]
2538-
pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
2539-
#[allow(improper_ctypes)]
2540-
extern "C" {
2541-
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
2542-
fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
2543-
}
2544-
vrndnq_f32_(a)
2545-
}
2546-
25472521
/// Floating-point round to integral, to nearest with ties to even
25482522
#[inline]
25492523
#[target_feature(enable = "neon")]
@@ -8884,22 +8858,6 @@ mod test {
88848858
assert_eq!(r, e);
88858859
}
88868860

8887-
#[simd_test(enable = "neon")]
8888-
unsafe fn test_vrndn_f32() {
8889-
let a: f32x2 = f32x2::new(-1.5, 0.5);
8890-
let e: f32x2 = f32x2::new(-2.0, 0.0);
8891-
let r: f32x2 = transmute(vrndn_f32(transmute(a)));
8892-
assert_eq!(r, e);
8893-
}
8894-
8895-
#[simd_test(enable = "neon")]
8896-
unsafe fn test_vrndnq_f32() {
8897-
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
8898-
let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
8899-
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
8900-
assert_eq!(r, e);
8901-
}
8902-
89038861
#[simd_test(enable = "neon")]
89048862
unsafe fn test_vrndn_f64() {
89058863
let a: f64 = -1.5;

crates/core_arch/src/arm_shared/neon/generated.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4198,6 +4198,38 @@ pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
41984198
vrhaddq_s32_(a, b)
41994199
}
42004200

4201+
/// Floating-point round to integral, to nearest with ties to even
4202+
#[inline]
4203+
#[target_feature(enable = "neon")]
4204+
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
4205+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
4206+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
4207+
pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
4208+
#[allow(improper_ctypes)]
4209+
extern "C" {
4210+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
4211+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
4212+
fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
4213+
}
4214+
vrndn_f32_(a)
4215+
}
4216+
4217+
/// Floating-point round to integral, to nearest with ties to even
4218+
#[inline]
4219+
#[target_feature(enable = "neon")]
4220+
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
4221+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
4222+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
4223+
pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
4224+
#[allow(improper_ctypes)]
4225+
extern "C" {
4226+
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
4227+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
4228+
fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
4229+
}
4230+
vrndnq_f32_(a)
4231+
}
4232+
42014233
/// Saturating add
42024234
#[inline]
42034235
#[target_feature(enable = "neon")]
@@ -14921,6 +14953,22 @@ mod test {
1492114953
assert_eq!(r, e);
1492214954
}
1492314955

14956+
#[simd_test(enable = "neon")]
14957+
unsafe fn test_vrndn_f32() {
14958+
let a: f32x2 = f32x2::new(-1.5, 0.5);
14959+
let e: f32x2 = f32x2::new(-2.0, 0.0);
14960+
let r: f32x2 = transmute(vrndn_f32(transmute(a)));
14961+
assert_eq!(r, e);
14962+
}
14963+
14964+
#[simd_test(enable = "neon")]
14965+
unsafe fn test_vrndnq_f32() {
14966+
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
14967+
let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
14968+
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
14969+
assert_eq!(r, e);
14970+
}
14971+
1492414972
#[simd_test(enable = "neon")]
1492514973
unsafe fn test_vqadd_u8() {
1492614974
let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);

crates/stdarch-gen/neon.spec

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,7 +1401,12 @@ validate -2.0, 0.0, 2.0, 2.0
14011401

14021402
link-aarch64 = frintn._EXT_
14031403
aarch64 = frintn
1404-
generate float*_t, float64x*_t
1404+
generate float64x*_t
1405+
1406+
target = fp-armv8
1407+
arm = vrintn
1408+
link-arm = vrintn._EXT_
1409+
generate float*_t
14051410

14061411
/// Floating-point round to integral, toward minus infinity
14071412
name = vrndm
@@ -3901,4 +3906,4 @@ validate MAX, 7
39013906

39023907
aarch64 = sqabs
39033908
link-aarch64 = sqabs._EXT_
3904-
generate int64x*_t
3909+
generate int64x*_t

0 commit comments

Comments
 (0)