Skip to content

Commit 86c8d56

Browse files
authored
add neon instruction vget_high_* (#1074)
1 parent c07038c commit 86c8d56

File tree

2 files changed

+232
-0
lines changed

2 files changed

+232
-0
lines changed

crates/core_arch/src/aarch64/neon/mod.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1512,6 +1512,14 @@ pub unsafe fn vcombine_p64(low: poly64x1_t, high: poly64x1_t) -> poly64x2_t {
15121512
simd_shuffle2(low, high, [0, 1])
15131513
}
15141514

1515+
/// Duplicate vector element to vector or scalar
1516+
#[inline]
1517+
#[target_feature(enable = "neon")]
1518+
#[cfg_attr(test, assert_instr(mov))]
1519+
pub unsafe fn vget_high_f64(a: float64x2_t) -> float64x1_t {
1520+
float64x1_t(simd_extract(a, 1))
1521+
}
1522+
15151523
/* FIXME: 16-bit float
15161524
/// Vector combine
15171525
#[inline]
@@ -3467,6 +3475,14 @@ mod tests {
34673475
test_vcombine!(test_vcombine_p64 => vcombine_p64([3_u64], [13_u64]));
34683476
test_vcombine!(test_vcombine_f64 => vcombine_f64([-3_f64], [13_f64]));
34693477

3478+
#[simd_test(enable = "neon")]
3479+
unsafe fn test_vget_high_f64() {
3480+
let a = f64x2::new(1.0, 2.0);
3481+
let e = f64x1::new(2.0);
3482+
let r: f64x1 = transmute(vget_high_f64(transmute(a)));
3483+
assert_eq!(r, e);
3484+
}
3485+
34703486
#[simd_test(enable = "neon")]
34713487
unsafe fn test_vceq_u64() {
34723488
test_cmp_u64(

crates/core_arch/src/arm/neon/mod.rs

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3742,6 +3742,126 @@ pub unsafe fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
37423742
simd_extract(v, IMM5 as u32)
37433743
}
37443744

3745+
/// Duplicate vector element to vector or scalar
3746+
#[inline]
3747+
#[target_feature(enable = "neon")]
3748+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3749+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3750+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3751+
pub unsafe fn vget_high_s8(a: int8x16_t) -> int8x8_t {
3752+
simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
3753+
}
3754+
3755+
/// Duplicate vector element to vector or scalar
3756+
#[inline]
3757+
#[target_feature(enable = "neon")]
3758+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3759+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3760+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3761+
pub unsafe fn vget_high_s16(a: int16x8_t) -> int16x4_t {
3762+
simd_shuffle4(a, a, [4, 5, 6, 7])
3763+
}
3764+
3765+
/// Duplicate vector element to vector or scalar
3766+
#[inline]
3767+
#[target_feature(enable = "neon")]
3768+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3769+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3770+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3771+
pub unsafe fn vget_high_s32(a: int32x4_t) -> int32x2_t {
3772+
simd_shuffle2(a, a, [2, 3])
3773+
}
3774+
3775+
/// Duplicate vector element to vector or scalar
3776+
#[inline]
3777+
#[target_feature(enable = "neon")]
3778+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3779+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3780+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3781+
pub unsafe fn vget_high_s64(a: int64x2_t) -> int64x1_t {
3782+
int64x1_t(simd_extract(a, 1))
3783+
}
3784+
3785+
/// Duplicate vector element to vector or scalar
3786+
#[inline]
3787+
#[target_feature(enable = "neon")]
3788+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3789+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3790+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3791+
pub unsafe fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
3792+
simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
3793+
}
3794+
3795+
/// Duplicate vector element to vector or scalar
3796+
#[inline]
3797+
#[target_feature(enable = "neon")]
3798+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3799+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3800+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3801+
pub unsafe fn vget_high_u16(a: uint16x8_t) -> uint16x4_t {
3802+
simd_shuffle4(a, a, [4, 5, 6, 7])
3803+
}
3804+
3805+
/// Duplicate vector element to vector or scalar
3806+
#[inline]
3807+
#[target_feature(enable = "neon")]
3808+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3809+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3810+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3811+
pub unsafe fn vget_high_u32(a: uint32x4_t) -> uint32x2_t {
3812+
simd_shuffle2(a, a, [2, 3])
3813+
}
3814+
3815+
/// Duplicate vector element to vector or scalar
3816+
#[inline]
3817+
#[target_feature(enable = "neon")]
3818+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3819+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3820+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3821+
pub unsafe fn vget_high_u64(a: uint64x2_t) -> uint64x1_t {
3822+
uint64x1_t(simd_extract(a, 1))
3823+
}
3824+
3825+
/// Duplicate vector element to vector or scalar
3826+
#[inline]
3827+
#[target_feature(enable = "neon")]
3828+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3829+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3830+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3831+
pub unsafe fn vget_high_p8(a: poly8x16_t) -> poly8x8_t {
3832+
simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15])
3833+
}
3834+
3835+
/// Duplicate vector element to vector or scalar
3836+
#[inline]
3837+
#[target_feature(enable = "neon")]
3838+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3839+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3840+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3841+
pub unsafe fn vget_high_p16(a: poly16x8_t) -> poly16x4_t {
3842+
simd_shuffle4(a, a, [4, 5, 6, 7])
3843+
}
3844+
3845+
/// Duplicate vector element to vector or scalar
3846+
#[inline]
3847+
#[target_feature(enable = "neon")]
3848+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3849+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3850+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3851+
pub unsafe fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
3852+
poly64x1_t(simd_extract(a, 1))
3853+
}
3854+
3855+
/// Duplicate vector element to vector or scalar
3856+
#[inline]
3857+
#[target_feature(enable = "neon")]
3858+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3859+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3860+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3861+
pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t {
3862+
simd_shuffle2(a, a, [2, 3])
3863+
}
3864+
37453865
/// Duplicate vector element to vector or scalar
37463866
#[inline]
37473867
#[target_feature(enable = "neon")]
@@ -5697,6 +5817,102 @@ mod tests {
56975817
assert_eq!(r, e);
56985818
}
56995819

5820+
#[simd_test(enable = "neon")]
5821+
unsafe fn test_vget_high_s8() {
5822+
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
5823+
let e = i8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
5824+
let r: i8x8 = transmute(vget_high_s8(transmute(a)));
5825+
assert_eq!(r, e);
5826+
}
5827+
5828+
#[simd_test(enable = "neon")]
5829+
unsafe fn test_vget_high_s16() {
5830+
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
5831+
let e = i16x4::new(5, 6, 7, 8);
5832+
let r: i16x4 = transmute(vget_high_s16(transmute(a)));
5833+
assert_eq!(r, e);
5834+
}
5835+
5836+
#[simd_test(enable = "neon")]
5837+
unsafe fn test_vget_high_s32() {
5838+
let a = i32x4::new(1, 2, 3, 4);
5839+
let e = i32x2::new(3, 4);
5840+
let r: i32x2 = transmute(vget_high_s32(transmute(a)));
5841+
assert_eq!(r, e);
5842+
}
5843+
5844+
#[simd_test(enable = "neon")]
5845+
unsafe fn test_vget_high_s64() {
5846+
let a = i64x2::new(1, 2);
5847+
let e = i64x1::new(2);
5848+
let r: i64x1 = transmute(vget_high_s64(transmute(a)));
5849+
assert_eq!(r, e);
5850+
}
5851+
5852+
#[simd_test(enable = "neon")]
5853+
unsafe fn test_vget_high_u8() {
5854+
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
5855+
let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
5856+
let r: u8x8 = transmute(vget_high_s8(transmute(a)));
5857+
assert_eq!(r, e);
5858+
}
5859+
5860+
#[simd_test(enable = "neon")]
5861+
unsafe fn test_vget_high_u16() {
5862+
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
5863+
let e = u16x4::new(5, 6, 7, 8);
5864+
let r: u16x4 = transmute(vget_high_s16(transmute(a)));
5865+
assert_eq!(r, e);
5866+
}
5867+
5868+
#[simd_test(enable = "neon")]
5869+
unsafe fn test_vget_high_u32() {
5870+
let a = u32x4::new(1, 2, 3, 4);
5871+
let e = u32x2::new(3, 4);
5872+
let r: u32x2 = transmute(vget_high_s32(transmute(a)));
5873+
assert_eq!(r, e);
5874+
}
5875+
5876+
#[simd_test(enable = "neon")]
5877+
unsafe fn test_vget_high_u64() {
5878+
let a = u64x2::new(1, 2);
5879+
let e = u64x1::new(2);
5880+
let r: u64x1 = transmute(vget_high_s64(transmute(a)));
5881+
assert_eq!(r, e);
5882+
}
5883+
5884+
#[simd_test(enable = "neon")]
5885+
unsafe fn test_vget_high_p8() {
5886+
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
5887+
let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
5888+
let r: u8x8 = transmute(vget_high_p8(transmute(a)));
5889+
assert_eq!(r, e);
5890+
}
5891+
5892+
#[simd_test(enable = "neon")]
5893+
unsafe fn test_vget_high_p16() {
5894+
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
5895+
let e = u16x4::new(5, 6, 7, 8);
5896+
let r: u16x4 = transmute(vget_high_p16(transmute(a)));
5897+
assert_eq!(r, e);
5898+
}
5899+
5900+
#[simd_test(enable = "neon")]
5901+
unsafe fn test_vget_high_p64() {
5902+
let a = u64x2::new(1, 2);
5903+
let e = u64x1::new(2);
5904+
let r: u64x1 = transmute(vget_high_p64(transmute(a)));
5905+
assert_eq!(r, e);
5906+
}
5907+
5908+
#[simd_test(enable = "neon")]
5909+
unsafe fn test_vget_high_f32() {
5910+
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
5911+
let e = f32x2::new(3.0, 4.0);
5912+
let r: f32x2 = transmute(vget_high_f32(transmute(a)));
5913+
assert_eq!(r, e);
5914+
}
5915+
57005916
#[simd_test(enable = "neon")]
57015917
unsafe fn test_vdupq_n_s8() {
57025918
let v: i8 = 42;

0 commit comments

Comments
 (0)