Skip to content

Commit e9c60cb

Browse files
authored
add neon instruction vget_low_* and fix vget_high_* (rust-lang#1082)
1 parent 86c8d56 commit e9c60cb

File tree

2 files changed

+244
-16
lines changed

2 files changed

+244
-16
lines changed

crates/core_arch/src/aarch64/neon/mod.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1520,6 +1520,30 @@ pub unsafe fn vget_high_f64(a: float64x2_t) -> float64x1_t {
15201520
float64x1_t(simd_extract(a, 1))
15211521
}
15221522

1523+
/// Duplicate vector element to vector or scalar
1524+
#[inline]
1525+
#[target_feature(enable = "neon")]
1526+
#[cfg_attr(test, assert_instr(ldr))]
1527+
pub unsafe fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
1528+
transmute(u64x1::new(simd_extract(a, 1)))
1529+
}
1530+
1531+
/// Duplicate vector element to vector or scalar
1532+
#[inline]
1533+
#[target_feature(enable = "neon")]
1534+
#[cfg_attr(test, assert_instr(ldr))]
1535+
pub unsafe fn vget_low_f64(a: float64x2_t) -> float64x1_t {
1536+
float64x1_t(simd_extract(a, 0))
1537+
}
1538+
1539+
/// Duplicate vector element to vector or scalar
1540+
#[inline]
1541+
#[target_feature(enable = "neon")]
1542+
#[cfg_attr(test, assert_instr(ldr))]
1543+
pub unsafe fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
1544+
transmute(u64x1::new(simd_extract(a, 0)))
1545+
}
1546+
15231547
/* FIXME: 16-bit float
15241548
/// Vector combine
15251549
#[inline]
@@ -3483,6 +3507,30 @@ mod tests {
34833507
assert_eq!(r, e);
34843508
}
34853509

3510+
#[simd_test(enable = "neon")]
3511+
unsafe fn test_vget_high_p64() {
3512+
let a = u64x2::new(1, 2);
3513+
let e = u64x1::new(2);
3514+
let r: u64x1 = transmute(vget_high_p64(transmute(a)));
3515+
assert_eq!(r, e);
3516+
}
3517+
3518+
#[simd_test(enable = "neon")]
3519+
unsafe fn test_vget_low_f64() {
3520+
let a = f64x2::new(1.0, 2.0);
3521+
let e = f64x1::new(1.0);
3522+
let r: f64x1 = transmute(vget_low_f64(transmute(a)));
3523+
assert_eq!(r, e);
3524+
}
3525+
3526+
#[simd_test(enable = "neon")]
3527+
unsafe fn test_vget_low_p64() {
3528+
let a = u64x2::new(1, 2);
3529+
let e = u64x1::new(1);
3530+
let r: u64x1 = transmute(vget_low_p64(transmute(a)));
3531+
assert_eq!(r, e);
3532+
}
3533+
34863534
#[simd_test(enable = "neon")]
34873535
unsafe fn test_vceq_u64() {
34883536
test_cmp_u64(

crates/core_arch/src/arm/neon/mod.rs

Lines changed: 196 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3848,18 +3848,118 @@ pub unsafe fn vget_high_p16(a: poly16x8_t) -> poly16x4_t {
38483848
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
38493849
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
38503850
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3851-
pub unsafe fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
3852-
poly64x1_t(simd_extract(a, 1))
3851+
pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t {
3852+
simd_shuffle2(a, a, [2, 3])
38533853
}
38543854

38553855
/// Duplicate vector element to vector or scalar
38563856
#[inline]
38573857
#[target_feature(enable = "neon")]
38583858
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3859-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
3860-
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))]
3861-
pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t {
3862-
simd_shuffle2(a, a, [2, 3])
3859+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
3860+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
3861+
pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
3862+
simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
3863+
}
3864+
3865+
/// Duplicate vector element to vector or scalar
3866+
#[inline]
3867+
#[target_feature(enable = "neon")]
3868+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3869+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
3870+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
3871+
pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t {
3872+
simd_shuffle4(a, a, [0, 1, 2, 3])
3873+
}
3874+
3875+
/// Duplicate vector element to vector or scalar
3876+
#[inline]
3877+
#[target_feature(enable = "neon")]
3878+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3879+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
3880+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
3881+
pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t {
3882+
simd_shuffle2(a, a, [0, 1])
3883+
}
3884+
3885+
/// Duplicate vector element to vector or scalar
3886+
#[inline]
3887+
#[target_feature(enable = "neon")]
3888+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3889+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
3890+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
3891+
pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t {
3892+
int64x1_t(simd_extract(a, 0))
3893+
}
3894+
3895+
/// Duplicate vector element to vector or scalar
3896+
#[inline]
3897+
#[target_feature(enable = "neon")]
3898+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3899+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
3900+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
3901+
pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
3902+
simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
3903+
}
3904+
3905+
/// Duplicate vector element to vector or scalar
3906+
#[inline]
3907+
#[target_feature(enable = "neon")]
3908+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3909+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
3910+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
3911+
pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
3912+
simd_shuffle4(a, a, [0, 1, 2, 3])
3913+
}
3914+
3915+
/// Duplicate vector element to vector or scalar
3916+
#[inline]
3917+
#[target_feature(enable = "neon")]
3918+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3919+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
3920+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
3921+
pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
3922+
simd_shuffle2(a, a, [0, 1])
3923+
}
3924+
3925+
/// Duplicate vector element to vector or scalar
3926+
#[inline]
3927+
#[target_feature(enable = "neon")]
3928+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3929+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
3930+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
3931+
pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
3932+
uint64x1_t(simd_extract(a, 0))
3933+
}
3934+
3935+
/// Duplicate vector element to vector or scalar
3936+
#[inline]
3937+
#[target_feature(enable = "neon")]
3938+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3939+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
3940+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
3941+
pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
3942+
simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
3943+
}
3944+
3945+
/// Duplicate vector element to vector or scalar
3946+
#[inline]
3947+
#[target_feature(enable = "neon")]
3948+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3949+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
3950+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
3951+
pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
3952+
simd_shuffle4(a, a, [0, 1, 2, 3])
3953+
}
3954+
3955+
/// Duplicate vector element to vector or scalar
3956+
#[inline]
3957+
#[target_feature(enable = "neon")]
3958+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
3959+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
3960+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
3961+
pub unsafe fn vget_low_f32(a: float32x4_t) -> float32x2_t {
3962+
simd_shuffle2(a, a, [0, 1])
38633963
}
38643964

38653965
/// Duplicate vector element to vector or scalar
@@ -5853,31 +5953,31 @@ mod tests {
58535953
unsafe fn test_vget_high_u8() {
58545954
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
58555955
let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
5856-
let r: u8x8 = transmute(vget_high_s8(transmute(a)));
5956+
let r: u8x8 = transmute(vget_high_u8(transmute(a)));
58575957
assert_eq!(r, e);
58585958
}
58595959

58605960
#[simd_test(enable = "neon")]
58615961
unsafe fn test_vget_high_u16() {
58625962
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
58635963
let e = u16x4::new(5, 6, 7, 8);
5864-
let r: u16x4 = transmute(vget_high_s16(transmute(a)));
5964+
let r: u16x4 = transmute(vget_high_u16(transmute(a)));
58655965
assert_eq!(r, e);
58665966
}
58675967

58685968
#[simd_test(enable = "neon")]
58695969
unsafe fn test_vget_high_u32() {
58705970
let a = u32x4::new(1, 2, 3, 4);
58715971
let e = u32x2::new(3, 4);
5872-
let r: u32x2 = transmute(vget_high_s32(transmute(a)));
5972+
let r: u32x2 = transmute(vget_high_u32(transmute(a)));
58735973
assert_eq!(r, e);
58745974
}
58755975

58765976
#[simd_test(enable = "neon")]
58775977
unsafe fn test_vget_high_u64() {
58785978
let a = u64x2::new(1, 2);
58795979
let e = u64x1::new(2);
5880-
let r: u64x1 = transmute(vget_high_s64(transmute(a)));
5980+
let r: u64x1 = transmute(vget_high_u64(transmute(a)));
58815981
assert_eq!(r, e);
58825982
}
58835983

@@ -5898,18 +5998,98 @@ mod tests {
58985998
}
58995999

59006000
#[simd_test(enable = "neon")]
5901-
unsafe fn test_vget_high_p64() {
6001+
unsafe fn test_vget_high_f32() {
6002+
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
6003+
let e = f32x2::new(3.0, 4.0);
6004+
let r: f32x2 = transmute(vget_high_f32(transmute(a)));
6005+
assert_eq!(r, e);
6006+
}
6007+
6008+
#[simd_test(enable = "neon")]
6009+
unsafe fn test_vget_low_s8() {
6010+
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
6011+
let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
6012+
let r: i8x8 = transmute(vget_low_s8(transmute(a)));
6013+
assert_eq!(r, e);
6014+
}
6015+
6016+
#[simd_test(enable = "neon")]
6017+
unsafe fn test_vget_low_s16() {
6018+
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
6019+
let e = i16x4::new(1, 2, 3, 4);
6020+
let r: i16x4 = transmute(vget_low_s16(transmute(a)));
6021+
assert_eq!(r, e);
6022+
}
6023+
6024+
#[simd_test(enable = "neon")]
6025+
unsafe fn test_vget_low_s32() {
6026+
let a = i32x4::new(1, 2, 3, 4);
6027+
let e = i32x2::new(1, 2);
6028+
let r: i32x2 = transmute(vget_low_s32(transmute(a)));
6029+
assert_eq!(r, e);
6030+
}
6031+
6032+
#[simd_test(enable = "neon")]
6033+
unsafe fn test_vget_low_s64() {
6034+
let a = i64x2::new(1, 2);
6035+
let e = i64x1::new(1);
6036+
let r: i64x1 = transmute(vget_low_s64(transmute(a)));
6037+
assert_eq!(r, e);
6038+
}
6039+
6040+
#[simd_test(enable = "neon")]
6041+
unsafe fn test_vget_low_u8() {
6042+
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
6043+
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
6044+
let r: u8x8 = transmute(vget_low_u8(transmute(a)));
6045+
assert_eq!(r, e);
6046+
}
6047+
6048+
#[simd_test(enable = "neon")]
6049+
unsafe fn test_vget_low_u16() {
6050+
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
6051+
let e = u16x4::new(1, 2, 3, 4);
6052+
let r: u16x4 = transmute(vget_low_u16(transmute(a)));
6053+
assert_eq!(r, e);
6054+
}
6055+
6056+
#[simd_test(enable = "neon")]
6057+
unsafe fn test_vget_low_u32() {
6058+
let a = u32x4::new(1, 2, 3, 4);
6059+
let e = u32x2::new(1, 2);
6060+
let r: u32x2 = transmute(vget_low_u32(transmute(a)));
6061+
assert_eq!(r, e);
6062+
}
6063+
6064+
#[simd_test(enable = "neon")]
6065+
unsafe fn test_vget_low_u64() {
59026066
let a = u64x2::new(1, 2);
5903-
let e = u64x1::new(2);
5904-
let r: u64x1 = transmute(vget_high_p64(transmute(a)));
6067+
let e = u64x1::new(1);
6068+
let r: u64x1 = transmute(vget_low_u64(transmute(a)));
59056069
assert_eq!(r, e);
59066070
}
59076071

59086072
#[simd_test(enable = "neon")]
5909-
unsafe fn test_vget_high_f32() {
6073+
unsafe fn test_vget_low_p8() {
6074+
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
6075+
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
6076+
let r: u8x8 = transmute(vget_low_p8(transmute(a)));
6077+
assert_eq!(r, e);
6078+
}
6079+
6080+
#[simd_test(enable = "neon")]
6081+
unsafe fn test_vget_low_p16() {
6082+
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
6083+
let e = u16x4::new(1, 2, 3, 4);
6084+
let r: u16x4 = transmute(vget_low_p16(transmute(a)));
6085+
assert_eq!(r, e);
6086+
}
6087+
6088+
#[simd_test(enable = "neon")]
6089+
unsafe fn test_vget_low_f32() {
59106090
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
5911-
let e = f32x2::new(3.0, 4.0);
5912-
let r: f32x2 = transmute(vget_high_f32(transmute(a)));
6091+
let e = f32x2::new(1.0, 2.0);
6092+
let r: f32x2 = transmute(vget_low_f32(transmute(a)));
59136093
assert_eq!(r, e);
59146094
}
59156095

0 commit comments

Comments
 (0)