Skip to content

Commit aae8eb3

Browse files
mrkajetanpAmanieu
authored andcommitted
core-arch: Add NEON fp16 intrinsics
1 parent 0907585 commit aae8eb3

File tree

7 files changed

+16588
-676
lines changed

7 files changed

+16588
-676
lines changed

Diff for: crates/core_arch/src/aarch64/neon/generated.rs

+4,242-108
Large diffs are not rendered by default.

Diff for: crates/core_arch/src/arm_shared/neon/generated.rs

+6,208-272
Large diffs are not rendered by default.

Diff for: crates/core_arch/src/arm_shared/neon/mod.rs

+36
Original file line numberDiff line numberDiff line change
@@ -4783,6 +4783,24 @@ pub unsafe fn vbsl_u64(a: uint64x1_t, b: uint64x1_t, c: uint64x1_t) -> uint64x1_
47834783
simd_or(simd_and(a, b), simd_and(simd_xor(a, transmute(not)), c))
47844784
}
47854785

4786+
/// Bitwise Select.
4787+
#[inline]
4788+
#[target_feature(enable = "neon,fp16")]
4789+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4790+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
4791+
#[cfg_attr(
4792+
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
4793+
assert_instr(bsl)
4794+
)]
4795+
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
4796+
pub unsafe fn vbsl_f16(a: uint16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
4797+
let not = int16x4_t::splat(-1);
4798+
transmute(simd_or(
4799+
simd_and(a, transmute(b)),
4800+
simd_and(simd_xor(a, transmute(not)), transmute(c)),
4801+
))
4802+
}
4803+
47864804
/// Bitwise Select.
47874805
#[inline]
47884806
#[target_feature(enable = "neon")]
@@ -5096,6 +5114,24 @@ pub unsafe fn vbslq_p16(a: uint16x8_t, b: poly16x8_t, c: poly16x8_t) -> poly16x8
50965114
))
50975115
}
50985116

5117+
/// Bitwise Select.
5118+
#[inline]
5119+
#[target_feature(enable = "neon,fp16")]
5120+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
5121+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))]
5122+
#[cfg_attr(
5123+
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
5124+
assert_instr(bsl)
5125+
)]
5126+
#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
5127+
pub unsafe fn vbslq_f16(a: uint16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
5128+
let not = int16x8_t::splat(-1);
5129+
transmute(simd_or(
5130+
simd_and(a, transmute(b)),
5131+
simd_and(simd_xor(a, transmute(not)), transmute(c)),
5132+
))
5133+
}
5134+
50995135
/// Bitwise Select. (128-bit)
51005136
#[inline]
51015137
#[target_feature(enable = "neon")]

Diff for: crates/intrinsic-test/missing_arm.txt

+136-1
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,74 @@ vsri_n_p64
108108
vsriq_n_p64
109109
vtst_p64
110110
vtstq_p64
111+
vaddh_f16
112+
vsubh_f16
113+
vabsh_f16
114+
vdivh_f16
115+
vmulh_f16
116+
vfmsh_f16
117+
vfmah_f16
118+
vminnmh_f16
119+
vmaxnmh_f16
120+
vrndh_f16
121+
vrndnh_f16
122+
vrndih_f16
123+
vrndah_f16
124+
vrndph_f16
125+
vrndmh_f16
126+
vrndxh_f16
127+
vsqrth_f16
128+
vnegh_f16
129+
vcvth_f16_s32
130+
vcvth_s32_f16
131+
vcvth_n_f16_s32
132+
vcvth_n_s32_f16
133+
vcvth_f16_u32
134+
vcvth_u32_f16
135+
vcvth_n_f16_u32
136+
vcvth_n_u32_f16
137+
vcvtah_s32_f16
138+
vcvtah_u32_f16
139+
vcvtmh_s32_f16
140+
vcvtmh_u32_f16
141+
vcvtpq_s16_f16
142+
vcvtpq_u16_f16
143+
vcvtp_s16_f16
144+
vcvtp_u16_f16
145+
vcvtph_s32_f16
146+
vcvtph_u32_f16
147+
vcvtnh_u32_f16
148+
vcvtnh_s32_f16
149+
vfmlsl_low_f16
150+
vfmlslq_low_f16
151+
vfmlsl_high_f16
152+
vfmlslq_high_f16
153+
vfmlsl_lane_high_f16
154+
vfmlsl_laneq_high_f16
155+
vfmlslq_lane_high_f16
156+
vfmlslq_laneq_high_f16
157+
vfmlsl_lane_low_f16
158+
vfmlsl_laneq_low_f16
159+
vfmlslq_lane_low_f16
160+
vfmlslq_laneq_low_f16
161+
vfmlal_low_f16
162+
vfmlalq_low_f16
163+
vfmlal_high_f16
164+
vfmlalq_high_f16
165+
vfmlal_lane_low_f16
166+
vfmlal_laneq_low_f16
167+
vfmlalq_lane_low_f16
168+
vfmlalq_laneq_low_f16
169+
vfmlal_lane_high_f16
170+
vfmlal_laneq_high_f16
171+
vfmlalq_lane_high_f16
172+
vfmlalq_laneq_high_f16
173+
vreinterpret_f16_p64
174+
vreinterpretq_f16_p64
175+
vreinterpret_p64_f16
176+
vreinterpretq_p64_f16
177+
vreinterpret_p128_f16
178+
vreinterpretq_p128_f16
111179

112180
# Present in Clang header but triggers an ICE due to lack of backend support.
113181
vcmla_f32
@@ -134,6 +202,31 @@ vcmlaq_rot270_laneq_f32
134202
vcmlaq_rot90_f32
135203
vcmlaq_rot90_lane_f32
136204
vcmlaq_rot90_laneq_f32
205+
vcmla_f16
206+
vcmlaq_f16
207+
vcmla_laneq_f16
208+
vcmla_lane_f16
209+
vcmla_laneq_f16
210+
vcmlaq_lane_f16
211+
vcmlaq_laneq_f16
212+
vcmla_rot90_f16
213+
vcmlaq_rot90_f16
214+
vcmla_rot180_f16
215+
vcmlaq_rot180_f16
216+
vcmla_rot270_f16
217+
vcmlaq_rot270_f16
218+
vcmla_rot90_lane_f16
219+
vcmla_rot90_laneq_f16
220+
vcmlaq_rot90_lane_f16
221+
vcmlaq_rot90_laneq_f16
222+
vcmla_rot180_lane_f16
223+
vcmla_rot180_laneq_f16
224+
vcmlaq_rot180_lane_f16
225+
vcmlaq_rot180_laneq_f16
226+
vcmla_rot270_lane_f16
227+
vcmla_rot270_laneq_f16
228+
vcmlaq_rot270_lane_f16
229+
vcmlaq_rot270_laneq_f16
137230

138231
# Implemented in stdarch for A64 only, Clang support both A32/A64
139232
vadd_s64
@@ -182,4 +275,46 @@ vrndpq_f32
182275
vrndq_f32
183276
vrndq_f32
184277
vrndx_f32
185-
vrndxq_f32
278+
vrndxq_f32
279+
vrnda_f16
280+
vrnda_f16
281+
vrndaq_f16
282+
vrndaq_f16
283+
vrnd_f16
284+
vrnd_f16
285+
vrndi_f16
286+
vrndi_f16
287+
vrndiq_f16
288+
vrndiq_f16
289+
vrndm_f16
290+
vrndm_f16
291+
vrndmq_f16
292+
vrndmq_f16
293+
vrndns_f16
294+
vrndp_f16
295+
vrndpq_f16
296+
vrndq_f16
297+
vrndx_f16
298+
vrndxq_f16
299+
vpmin_f16
300+
vpmax_f16
301+
vcaddq_rot270_f16
302+
vcaddq_rot90_f16
303+
vcadd_rot270_f16
304+
vcadd_rot90_f16
305+
vcvtm_s16_f16
306+
vcvtmq_s16_f16
307+
vcvtm_u16_f16
308+
vcvtmq_u16_f16
309+
vcvtaq_s16_f16
310+
vcvtaq_u16_f16
311+
vcvtnq_s16_f16
312+
vcvtnq_u16_f16
313+
vcvtn_s16_f16
314+
vcvtn_u16_f16
315+
vcvtaq_s16_f16
316+
vcvtaq_u16_f16
317+
vcvta_s16_f16
318+
vcvta_u16_f16
319+
vceqz_f16
320+
vceqzq_f16

Diff for: crates/intrinsic-test/src/main.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ fn generate_rust_program(notices: &str, intrinsic: &Intrinsic, target: &str) ->
194194
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sha3))]
195195
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))]
196196
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))]
197-
#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_f16))]
197+
#![feature(stdarch_neon_f16)]
198198
#![allow(non_upper_case_globals)]
199199
use core_arch::arch::{target_arch}::*;
200200

0 commit comments

Comments
 (0)