Skip to content

Commit 78e32f8

Browse files
authored
implement different types of parameters and double suffixes in code generator (rust-lang#1083)
1 parent e9c60cb commit 78e32f8

File tree

3 files changed

+263
-26
lines changed

3 files changed

+263
-26
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,6 +1099,60 @@ pub unsafe fn vcaleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
10991099
vcageq_f64(b, a)
11001100
}
11011101

1102+
/// Floating-point convert to higher precision long
1103+
#[inline]
1104+
#[target_feature(enable = "neon")]
1105+
#[cfg_attr(test, assert_instr(fcvtl))]
1106+
pub unsafe fn vcvt_f64_f32(a: float32x2_t) -> float64x2_t {
1107+
simd_cast(a)
1108+
}
1109+
1110+
/// Floating-point convert to higher precision long
1111+
#[inline]
1112+
#[target_feature(enable = "neon")]
1113+
#[cfg_attr(test, assert_instr(fcvtl))]
1114+
pub unsafe fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t {
1115+
let b: float32x2_t = simd_shuffle2(a, a, [2, 3]);
1116+
simd_cast(b)
1117+
}
1118+
1119+
/// Floating-point convert to lower precision narrow
1120+
#[inline]
1121+
#[target_feature(enable = "neon")]
1122+
#[cfg_attr(test, assert_instr(fcvtn))]
1123+
pub unsafe fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t {
1124+
simd_cast(a)
1125+
}
1126+
1127+
/// Floating-point convert to lower precision narrow
1128+
#[inline]
1129+
#[target_feature(enable = "neon")]
1130+
#[cfg_attr(test, assert_instr(fcvtn))]
1131+
pub unsafe fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
1132+
simd_shuffle4(a, simd_cast(b), [0, 1, 2, 3])
1133+
}
1134+
1135+
/// Floating-point convert to lower precision narrow, rounding to odd
1136+
#[inline]
1137+
#[target_feature(enable = "neon")]
1138+
#[cfg_attr(test, assert_instr(fcvtxn))]
1139+
pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
1140+
#[allow(improper_ctypes)]
1141+
extern "C" {
1142+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtxn.v2f32.v2f64")]
1143+
fn vcvtx_f32_f64_(a: float64x2_t) -> float32x2_t;
1144+
}
1145+
vcvtx_f32_f64_(a)
1146+
}
1147+
1148+
/// Floating-point convert to lower precision narrow, rounding to odd
1149+
#[inline]
1150+
#[target_feature(enable = "neon")]
1151+
#[cfg_attr(test, assert_instr(fcvtxn))]
1152+
pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
1153+
simd_shuffle4(a, vcvtx_f32_f64(b), [0, 1, 2, 3])
1154+
}
1155+
11021156
/// Multiply
11031157
#[inline]
11041158
#[target_feature(enable = "neon")]
@@ -2366,6 +2420,56 @@ mod test {
23662420
assert_eq!(r, e);
23672421
}
23682422

2423+
#[simd_test(enable = "neon")]
2424+
unsafe fn test_vcvt_f64_f32() {
2425+
let a: f32x2 = f32x2::new(-1.2, 1.2);
2426+
let e: f64x2 = f64x2::new(-1.2f32 as f64, 1.2f32 as f64);
2427+
let r: f64x2 = transmute(vcvt_f64_f32(transmute(a)));
2428+
assert_eq!(r, e);
2429+
}
2430+
2431+
#[simd_test(enable = "neon")]
2432+
unsafe fn test_vcvt_high_f64_f32() {
2433+
let a: f32x4 = f32x4::new(-1.2, 1.2, 2.3, 3.4);
2434+
let e: f64x2 = f64x2::new(2.3f32 as f64, 3.4f32 as f64);
2435+
let r: f64x2 = transmute(vcvt_high_f64_f32(transmute(a)));
2436+
assert_eq!(r, e);
2437+
}
2438+
2439+
#[simd_test(enable = "neon")]
2440+
unsafe fn test_vcvt_f32_f64() {
2441+
let a: f64x2 = f64x2::new(-1.2, 1.2);
2442+
let e: f32x2 = f32x2::new(-1.2f64 as f32, 1.2f64 as f32);
2443+
let r: f32x2 = transmute(vcvt_f32_f64(transmute(a)));
2444+
assert_eq!(r, e);
2445+
}
2446+
2447+
#[simd_test(enable = "neon")]
2448+
unsafe fn test_vcvt_high_f32_f64() {
2449+
let a: f32x2 = f32x2::new(-1.2, 1.2);
2450+
let b: f64x2 = f64x2::new(-2.3, 3.4);
2451+
let e: f32x4 = f32x4::new(-1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32);
2452+
let r: f32x4 = transmute(vcvt_high_f32_f64(transmute(a), transmute(b)));
2453+
assert_eq!(r, e);
2454+
}
2455+
2456+
#[simd_test(enable = "neon")]
2457+
unsafe fn test_vcvtx_f32_f64() {
2458+
let a: f64x2 = f64x2::new(-1.0, 2.0);
2459+
let e: f32x2 = f32x2::new(-1.0, 2.0);
2460+
let r: f32x2 = transmute(vcvtx_f32_f64(transmute(a)));
2461+
assert_eq!(r, e);
2462+
}
2463+
2464+
#[simd_test(enable = "neon")]
2465+
unsafe fn test_vcvtx_high_f32_f64() {
2466+
let a: f32x2 = f32x2::new(-1.0, 2.0);
2467+
let b: f64x2 = f64x2::new(-3.0, 4.0);
2468+
let e: f32x4 = f32x4::new(-1.0, 2.0, -3.0, 4.0);
2469+
let r: f32x4 = transmute(vcvtx_high_f32_f64(transmute(a), transmute(b)));
2470+
assert_eq!(r, e);
2471+
}
2472+
23692473
#[simd_test(enable = "neon")]
23702474
unsafe fn test_vmul_f64() {
23712475
let a: f64 = 1.0;

crates/stdarch-gen/neon.spec

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ generate int*_t
527527

528528
/// Unsigned count leading sign bits
529529
name = vclz
530-
multi_fn = transmute, [self-signed-ext, transmute(a)]
530+
multi_fn = transmute, {self-signed-ext, transmute(a)}
531531
a = MIN, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX
532532
validate BITS, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0
533533

@@ -589,6 +589,69 @@ generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
589589
arm = vacge.s
590590
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
591591

592+
/// Floating-point convert to higher precision long
593+
name = vcvt
594+
double-suffixes
595+
fn = simd_cast
596+
a = -1.2, 1.2
597+
validate -1.2f32 as f64, 1.2f32 as f64
598+
599+
aarch64 = fcvtl
600+
generate float32x2_t:float64x2_t
601+
602+
/// Floating-point convert to higher precision long
603+
name = vcvt_high
604+
double-suffixes
605+
multi_fn = simd_shuffle2, b:float32x2_t, a, a, [2, 3]
606+
multi_fn = simd_cast, b
607+
a = -1.2, 1.2, 2.3, 3.4
608+
validate 2.3f32 as f64, 3.4f32 as f64
609+
610+
aarch64 = fcvtl
611+
generate float32x4_t:float64x2_t
612+
613+
/// Floating-point convert to lower precision narrow
614+
name = vcvt
615+
double-suffixes
616+
fn = simd_cast
617+
a = -1.2, 1.2
618+
validate -1.2f64 as f32, 1.2f64 as f32
619+
620+
aarch64 = fcvtn
621+
generate float64x2_t:float32x2_t
622+
623+
/// Floating-point convert to lower precision narrow
624+
name = vcvt_high
625+
double-suffixes
626+
multi_fn = simd_shuffle4, a, {simd_cast, b}, [0, 1, 2, 3]
627+
a = -1.2, 1.2
628+
b = -2.3, 3.4
629+
validate -1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32
630+
631+
aarch64 = fcvtn
632+
generate float32x2_t:float64x2_t:float32x4_t
633+
634+
/// Floating-point convert to lower precision narrow, rounding to odd
635+
name = vcvtx
636+
double-suffixes
637+
a = -1.0, 2.0
638+
validate -1.0, 2.0
639+
640+
aarch64 = fcvtxn
641+
link-aarch64 = fcvtxn._EXT2_._EXT_
642+
generate float64x2_t:float32x2_t
643+
644+
/// Floating-point convert to lower precision narrow, rounding to odd
645+
name = vcvtx_high
646+
double-suffixes
647+
multi_fn = simd_shuffle4, a, {vcvtx-doubleself-noext, b}, [0, 1, 2, 3]
648+
a = -1.0, 2.0
649+
b = -3.0, 4.0
650+
validate -1.0, 2.0, -3.0, 4.0
651+
652+
aarch64 = fcvtxn
653+
generate float32x2_t:float64x2_t:float32x4_t
654+
592655
/// Saturating subtract
593656
name = vqsub
594657
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42

0 commit comments

Comments
 (0)