Skip to content

Commit c07038c

Browse files
authored
add vcls, vclz, vcagt, vcage, vcalt, vcale neon instructions (#1072)
1 parent 3f6864a commit c07038c

File tree

5 files changed

+975
-20
lines changed

5 files changed

+975
-20
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,90 @@ pub unsafe fn vcltzq_f64(a: float64x2_t) -> uint64x2_t {
10151015
simd_lt(a, transmute(b))
10161016
}
10171017

1018+
/// Floating-point absolute compare greater than
1019+
#[inline]
1020+
#[target_feature(enable = "neon")]
1021+
#[cfg_attr(test, assert_instr(facgt))]
1022+
pub unsafe fn vcagt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
1023+
#[allow(improper_ctypes)]
1024+
extern "C" {
1025+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v1i64.v1f64")]
1026+
fn vcagt_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t;
1027+
}
1028+
vcagt_f64_(a, b)
1029+
}
1030+
1031+
/// Floating-point absolute compare greater than
1032+
#[inline]
1033+
#[target_feature(enable = "neon")]
1034+
#[cfg_attr(test, assert_instr(facgt))]
1035+
pub unsafe fn vcagtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
1036+
#[allow(improper_ctypes)]
1037+
extern "C" {
1038+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v2i64.v2f64")]
1039+
fn vcagtq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t;
1040+
}
1041+
vcagtq_f64_(a, b)
1042+
}
1043+
1044+
/// Floating-point absolute compare greater than or equal
1045+
#[inline]
1046+
#[target_feature(enable = "neon")]
1047+
#[cfg_attr(test, assert_instr(facge))]
1048+
pub unsafe fn vcage_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
1049+
#[allow(improper_ctypes)]
1050+
extern "C" {
1051+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v1i64.v1f64")]
1052+
fn vcage_f64_(a: float64x1_t, b: float64x1_t) -> uint64x1_t;
1053+
}
1054+
vcage_f64_(a, b)
1055+
}
1056+
1057+
/// Floating-point absolute compare greater than or equal
1058+
#[inline]
1059+
#[target_feature(enable = "neon")]
1060+
#[cfg_attr(test, assert_instr(facge))]
1061+
pub unsafe fn vcageq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
1062+
#[allow(improper_ctypes)]
1063+
extern "C" {
1064+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v2i64.v2f64")]
1065+
fn vcageq_f64_(a: float64x2_t, b: float64x2_t) -> uint64x2_t;
1066+
}
1067+
vcageq_f64_(a, b)
1068+
}
1069+
1070+
/// Floating-point absolute compare less than
1071+
#[inline]
1072+
#[target_feature(enable = "neon")]
1073+
#[cfg_attr(test, assert_instr(facgt))]
1074+
pub unsafe fn vcalt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
1075+
vcagt_f64(b, a)
1076+
}
1077+
1078+
/// Floating-point absolute compare less than
1079+
#[inline]
1080+
#[target_feature(enable = "neon")]
1081+
#[cfg_attr(test, assert_instr(facgt))]
1082+
pub unsafe fn vcaltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
1083+
vcagtq_f64(b, a)
1084+
}
1085+
1086+
/// Floating-point absolute compare less than or equal
1087+
#[inline]
1088+
#[target_feature(enable = "neon")]
1089+
#[cfg_attr(test, assert_instr(facge))]
1090+
pub unsafe fn vcale_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
1091+
vcage_f64(b, a)
1092+
}
1093+
1094+
/// Floating-point absolute compare less than or equal
1095+
#[inline]
1096+
#[target_feature(enable = "neon")]
1097+
#[cfg_attr(test, assert_instr(facge))]
1098+
pub unsafe fn vcaleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
1099+
vcageq_f64(b, a)
1100+
}
1101+
10181102
/// Multiply
10191103
#[inline]
10201104
#[target_feature(enable = "neon")]
@@ -2210,6 +2294,78 @@ mod test {
22102294
assert_eq!(r, e);
22112295
}
22122296

2297+
#[simd_test(enable = "neon")]
2298+
unsafe fn test_vcagt_f64() {
2299+
let a: f64 = -1.2;
2300+
let b: f64 = -1.1;
2301+
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
2302+
let r: u64x1 = transmute(vcagt_f64(transmute(a), transmute(b)));
2303+
assert_eq!(r, e);
2304+
}
2305+
2306+
#[simd_test(enable = "neon")]
2307+
unsafe fn test_vcagtq_f64() {
2308+
let a: f64x2 = f64x2::new(-1.2, 0.0);
2309+
let b: f64x2 = f64x2::new(-1.1, 0.0);
2310+
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
2311+
let r: u64x2 = transmute(vcagtq_f64(transmute(a), transmute(b)));
2312+
assert_eq!(r, e);
2313+
}
2314+
2315+
#[simd_test(enable = "neon")]
2316+
unsafe fn test_vcage_f64() {
2317+
let a: f64 = -1.2;
2318+
let b: f64 = -1.1;
2319+
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
2320+
let r: u64x1 = transmute(vcage_f64(transmute(a), transmute(b)));
2321+
assert_eq!(r, e);
2322+
}
2323+
2324+
#[simd_test(enable = "neon")]
2325+
unsafe fn test_vcageq_f64() {
2326+
let a: f64x2 = f64x2::new(-1.2, 0.0);
2327+
let b: f64x2 = f64x2::new(-1.1, 0.0);
2328+
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
2329+
let r: u64x2 = transmute(vcageq_f64(transmute(a), transmute(b)));
2330+
assert_eq!(r, e);
2331+
}
2332+
2333+
#[simd_test(enable = "neon")]
2334+
unsafe fn test_vcalt_f64() {
2335+
let a: f64 = -1.2;
2336+
let b: f64 = -1.1;
2337+
let e: u64x1 = u64x1::new(0);
2338+
let r: u64x1 = transmute(vcalt_f64(transmute(a), transmute(b)));
2339+
assert_eq!(r, e);
2340+
}
2341+
2342+
#[simd_test(enable = "neon")]
2343+
unsafe fn test_vcaltq_f64() {
2344+
let a: f64x2 = f64x2::new(-1.2, 0.0);
2345+
let b: f64x2 = f64x2::new(-1.1, 0.0);
2346+
let e: u64x2 = u64x2::new(0, 0);
2347+
let r: u64x2 = transmute(vcaltq_f64(transmute(a), transmute(b)));
2348+
assert_eq!(r, e);
2349+
}
2350+
2351+
#[simd_test(enable = "neon")]
2352+
unsafe fn test_vcale_f64() {
2353+
let a: f64 = -1.2;
2354+
let b: f64 = -1.1;
2355+
let e: u64x1 = u64x1::new(0);
2356+
let r: u64x1 = transmute(vcale_f64(transmute(a), transmute(b)));
2357+
assert_eq!(r, e);
2358+
}
2359+
2360+
#[simd_test(enable = "neon")]
2361+
unsafe fn test_vcaleq_f64() {
2362+
let a: f64x2 = f64x2::new(-1.2, 0.0);
2363+
let b: f64x2 = f64x2::new(-1.1, 0.0);
2364+
let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
2365+
let r: u64x2 = transmute(vcaleq_f64(transmute(a), transmute(b)));
2366+
assert_eq!(r, e);
2367+
}
2368+
22132369
#[simd_test(enable = "neon")]
22142370
unsafe fn test_vmul_f64() {
22152371
let a: f64 = 1.0;

0 commit comments

Comments
 (0)