Skip to content

Commit 753271c

Browse files
authored
aarch64 neon intrinsics: vmaxq_f32, vminq_f32, vaddvq_f32, vrndnq_f32 (rust-lang#1533)
1 parent 5349365 commit 753271c

File tree

2 files changed

+81
-0
lines changed

2 files changed

+81
-0
lines changed

example/neon.rs

+43
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,44 @@ unsafe fn test_vqadd_u8() {
202202
assert_eq!(r, e);
203203
}
204204

205+
#[cfg(target_arch = "aarch64")]
206+
unsafe fn test_vmaxq_f32() {
207+
// AArch64 llvm intrinsic: llvm.aarch64.neon.fmax.v4f32
208+
let a = f32x4::from([0., -1., 2., -3.]);
209+
let b = f32x4::from([-4., 5., -6., 7.]);
210+
let e = f32x4::from([0., 5., 2., 7.]);
211+
let r: f32x4 = transmute(vmaxq_f32(transmute(a), transmute(b)));
212+
assert_eq!(r, e);
213+
}
214+
215+
#[cfg(target_arch = "aarch64")]
216+
unsafe fn test_vminq_f32() {
217+
// AArch64 llvm intrinsic: llvm.aarch64.neon.fmin.v4f32
218+
let a = f32x4::from([0., -1., 2., -3.]);
219+
let b = f32x4::from([-4., 5., -6., 7.]);
220+
let e = f32x4::from([-4., -1., -6., -3.]);
221+
let r: f32x4 = transmute(vminq_f32(transmute(a), transmute(b)));
222+
assert_eq!(r, e);
223+
}
224+
225+
#[cfg(target_arch = "aarch64")]
226+
unsafe fn test_vaddvq_f32() {
227+
// AArch64 llvm intrinsic: llvm.aarch64.neon.faddv.f32.v4f32
228+
let a = f32x4::from([0., 1., 2., 3.]);
229+
let e = 6f32;
230+
let r = vaddvq_f32(transmute(a));
231+
assert_eq!(r, e);
232+
}
233+
234+
#[cfg(target_arch = "aarch64")]
235+
unsafe fn test_vrndnq_f32() {
236+
// AArch64 llvm intrinsic: llvm.aarch64.neon.frintn.v4f32
237+
let a = f32x4::from([0.1, -1.9, 4.5, 5.5]);
238+
let e = f32x4::from([0., -2., 4., 6.]);
239+
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
240+
assert_eq!(r, e);
241+
}
242+
205243
#[cfg(target_arch = "aarch64")]
206244
fn main() {
207245
unsafe {
@@ -229,6 +267,11 @@ fn main() {
229267

230268
test_vqsub_u8();
231269
test_vqadd_u8();
270+
271+
test_vmaxq_f32();
272+
test_vminq_f32();
273+
test_vaddvq_f32();
274+
test_vrndnq_f32();
232275
}
233276
}
234277

src/intrinsics/llvm_aarch64.rs

+38
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,44 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
9191
);
9292
}
9393

94+
_ if intrinsic.starts_with("llvm.aarch64.neon.fmax.v") => {
95+
intrinsic_args!(fx, args => (x, y); intrinsic);
96+
97+
simd_pair_for_each_lane(
98+
fx,
99+
x,
100+
y,
101+
ret,
102+
&|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmax(x_lane, y_lane),
103+
);
104+
}
105+
106+
_ if intrinsic.starts_with("llvm.aarch64.neon.fmin.v") => {
107+
intrinsic_args!(fx, args => (x, y); intrinsic);
108+
109+
simd_pair_for_each_lane(
110+
fx,
111+
x,
112+
y,
113+
ret,
114+
&|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmin(x_lane, y_lane),
115+
);
116+
}
117+
118+
_ if intrinsic.starts_with("llvm.aarch64.neon.faddv.f32.v") => {
119+
intrinsic_args!(fx, args => (v); intrinsic);
120+
121+
simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().fadd(a, b));
122+
}
123+
124+
_ if intrinsic.starts_with("llvm.aarch64.neon.frintn.v") => {
125+
intrinsic_args!(fx, args => (v); intrinsic);
126+
127+
simd_for_each_lane(fx, v, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
128+
fx.bcx.ins().nearest(lane)
129+
});
130+
}
131+
94132
_ if intrinsic.starts_with("llvm.aarch64.neon.smaxv.i") => {
95133
intrinsic_args!(fx, args => (v); intrinsic);
96134

0 commit comments

Comments
 (0)