Skip to content

Commit abd44fc

Browse files
committed
Add f16 and f128 inline ASM support for aarch64
1 parent ec67cdf commit abd44fc

File tree

5 files changed

+130
-39
lines changed

5 files changed

+130
-39
lines changed

compiler/rustc_codegen_llvm/src/asm.rs

+11-3
Original file line numberDiff line numberDiff line change
@@ -913,8 +913,10 @@ fn llvm_asm_scalar_type<'ll>(cx: &CodegenCx<'ll, '_>, scalar: Scalar) -> &'ll Ty
913913
Primitive::Int(Integer::I16, _) => cx.type_i16(),
914914
Primitive::Int(Integer::I32, _) => cx.type_i32(),
915915
Primitive::Int(Integer::I64, _) => cx.type_i64(),
916+
Primitive::Float(Float::F16) => cx.type_f16(),
916917
Primitive::Float(Float::F32) => cx.type_f32(),
917918
Primitive::Float(Float::F64) => cx.type_f64(),
919+
Primitive::Float(Float::F128) => cx.type_f128(),
918920
// FIXME(erikdesjardins): handle non-default addrspace ptr sizes
919921
Primitive::Pointer(_) => cx.type_from_integer(dl.ptr_sized_integer()),
920922
_ => unreachable!(),
@@ -948,7 +950,9 @@ fn llvm_fixup_input<'ll, 'tcx>(
948950
value
949951
}
950952
}
951-
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
953+
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
954+
if s.primitive() != Primitive::Float(Float::F128) =>
955+
{
952956
let elem_ty = llvm_asm_scalar_type(bx.cx, s);
953957
let count = 16 / layout.size.bytes();
954958
let vec_ty = bx.cx.type_vector(elem_ty, count);
@@ -1090,7 +1094,9 @@ fn llvm_fixup_output<'ll, 'tcx>(
10901094
value
10911095
}
10921096
}
1093-
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
1097+
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
1098+
if s.primitive() != Primitive::Float(Float::F128) =>
1099+
{
10941100
value = bx.extract_element(value, bx.const_i32(0));
10951101
if let Primitive::Pointer(_) = s.primitive() {
10961102
value = bx.inttoptr(value, layout.llvm_type(bx.cx));
@@ -1222,7 +1228,9 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
12221228
layout.llvm_type(cx)
12231229
}
12241230
}
1225-
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
1231+
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
1232+
if s.primitive() != Primitive::Float(Float::F128) =>
1233+
{
12261234
let elem_ty = llvm_asm_scalar_type(cx, s);
12271235
let count = 16 / layout.size.bytes();
12281236
cx.type_vector(elem_ty, count)

compiler/rustc_target/src/asm/aarch64.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ impl AArch64InlineAsmRegClass {
6161
match self {
6262
Self::reg => types! { _: I8, I16, I32, I64, F16, F32, F64; },
6363
Self::vreg | Self::vreg_low16 => types! {
64-
neon: I8, I16, I32, I64, F16, F32, F64,
65-
VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2), VecF64(1),
66-
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(4),VecF16(8), VecF32(4), VecF64(2);
64+
neon: I8, I16, I32, I64, F16, F32, F64, F128,
65+
VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2), VecF64(1),
66+
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2);
6767
},
6868
Self::preg => &[],
6969
}

tests/assembly/asm/aarch64-types.rs

+106-27
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55
//@ [arm64ec] compile-flags: --target arm64ec-pc-windows-msvc
66
//@ [arm64ec] needs-llvm-components: aarch64
77

8-
#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch, f16)]
8+
#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch, f16, f128)]
99
#![crate_type = "rlib"]
1010
#![no_core]
1111
#![allow(asm_sub_register, non_camel_case_types)]
12+
// FIXME(f16_f128): Only needed for FIXME in check! and check_reg!
13+
#![feature(auto_traits)]
1214

1315
#[rustc_builtin_macro]
1416
macro_rules! asm {
@@ -41,8 +43,6 @@ pub struct i64x1(i64);
4143
#[repr(simd)]
4244
pub struct f16x4(f16, f16, f16, f16);
4345
#[repr(simd)]
44-
pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16);
45-
#[repr(simd)]
4646
pub struct f32x2(f32, f32);
4747
#[repr(simd)]
4848
pub struct f64x1(f64);
@@ -55,6 +55,8 @@ pub struct i32x4(i32, i32, i32, i32);
5555
#[repr(simd)]
5656
pub struct i64x2(i64, i64);
5757
#[repr(simd)]
58+
pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16);
59+
#[repr(simd)]
5860
pub struct f32x4(f32, f32, f32, f32);
5961
#[repr(simd)]
6062
pub struct f64x2(f64, f64);
@@ -66,13 +68,14 @@ impl Copy for i32 {}
6668
impl Copy for f32 {}
6769
impl Copy for i64 {}
6870
impl Copy for f64 {}
71+
impl Copy for f128 {}
6972
impl Copy for ptr {}
7073
impl Copy for i8x8 {}
7174
impl Copy for i16x4 {}
7275
impl Copy for i32x2 {}
7376
impl Copy for i64x1 {}
74-
impl Copy for f32x2 {}
7577
impl Copy for f16x4 {}
78+
impl Copy for f32x2 {}
7679
impl Copy for f64x1 {}
7780
impl Copy for i8x16 {}
7881
impl Copy for i16x8 {}
@@ -82,6 +85,12 @@ impl Copy for f16x8 {}
8285
impl Copy for f32x4 {}
8386
impl Copy for f64x2 {}
8487

88+
// FIXME(f16_f128): Only needed for FIXME in check! and check_reg!
89+
#[lang = "freeze"]
90+
unsafe auto trait Freeze {}
91+
#[lang = "unpin"]
92+
auto trait Unpin {}
93+
8594
extern "C" {
8695
fn extern_func();
8796
static extern_static: u8;
@@ -118,38 +127,44 @@ pub unsafe fn issue_75761() {
118127

119128
macro_rules! check {
120129
($func:ident $ty:ident $class:ident $mov:literal $modifier:literal) => {
130+
// FIXME(f16_f128): Change back to `$func(x: $ty) -> $ty` once arm64ec can pass and return
131+
// `f16` and `f128` without LLVM erroring.
132+
// LLVM issue: <https://github.com/llvm/llvm-project/issues/94434>
121133
#[no_mangle]
122-
pub unsafe fn $func(x: $ty) -> $ty {
134+
pub unsafe fn $func(inp: &$ty, out: &mut $ty) {
123135
// Hack to avoid function merging
124136
extern "Rust" {
125137
fn dont_merge(s: &str);
126138
}
127139
dont_merge(stringify!($func));
128140

141+
let x = *inp;
129142
let y;
130143
asm!(
131144
concat!($mov, " {:", $modifier, "}, {:", $modifier, "}"),
132145
out($class) y,
133146
in($class) x
134147
);
135-
y
148+
*out = y;
136149
}
137150
};
138151
}
139152

140153
macro_rules! check_reg {
141154
($func:ident $ty:ident $reg:tt $mov:literal) => {
155+
// FIXME(f16_f128): See FIXME in `check!`
142156
#[no_mangle]
143-
pub unsafe fn $func(x: $ty) -> $ty {
157+
pub unsafe fn $func(inp: &$ty, out: &mut $ty) {
144158
// Hack to avoid function merging
145159
extern "Rust" {
146160
fn dont_merge(s: &str);
147161
}
148162
dont_merge(stringify!($func));
149163

164+
let x = *inp;
150165
let y;
151166
asm!(concat!($mov, " ", $reg, ", ", $reg), lateout($reg) y, in($reg) x);
152-
y
167+
*out = y;
153168
}
154169
};
155170
}
@@ -166,18 +181,18 @@ check!(reg_i8 i8 reg "mov" "");
166181
// CHECK: //NO_APP
167182
check!(reg_i16 i16 reg "mov" "");
168183

184+
// CHECK-LABEL: {{("#)?}}reg_f16{{"?}}
185+
// CHECK: //APP
186+
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
187+
// CHECK: //NO_APP
188+
check!(reg_f16 f16 reg "mov" "");
189+
169190
// CHECK-LABEL: {{("#)?}}reg_i32{{"?}}
170191
// CHECK: //APP
171192
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
172193
// CHECK: //NO_APP
173194
check!(reg_i32 i32 reg "mov" "");
174195

175-
// CHECK-LABEL: reg_f16:
176-
// CHECK: @APP
177-
// CHECK: mov {{[a-z0-9]+}}, {{[a-z0-9]+}}
178-
// CHECK: @NO_APP
179-
check!(reg_f16 f16 reg "mov");
180-
181196
// CHECK-LABEL: {{("#)?}}reg_f32{{"?}}
182197
// CHECK: //APP
183198
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
@@ -214,6 +229,12 @@ check!(vreg_i8 i8 vreg "fmov" "s");
214229
// CHECK: //NO_APP
215230
check!(vreg_i16 i16 vreg "fmov" "s");
216231

232+
// CHECK-LABEL: {{("#)?}}vreg_f16{{"?}}
233+
// CHECK: //APP
234+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
235+
// CHECK: //NO_APP
236+
check!(vreg_f16 f16 vreg "fmov" "s");
237+
217238
// CHECK-LABEL: {{("#)?}}vreg_i32{{"?}}
218239
// CHECK: //APP
219240
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -238,6 +259,12 @@ check!(vreg_i64 i64 vreg "fmov" "s");
238259
// CHECK: //NO_APP
239260
check!(vreg_f64 f64 vreg "fmov" "s");
240261

262+
// CHECK-LABEL: {{("#)?}}vreg_f128{{"?}}
263+
// CHECK: //APP
264+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
265+
// CHECK: //NO_APP
266+
check!(vreg_f128 f128 vreg "fmov" "s");
267+
241268
// CHECK-LABEL: {{("#)?}}vreg_ptr{{"?}}
242269
// CHECK: //APP
243270
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -268,19 +295,11 @@ check!(vreg_i32x2 i32x2 vreg "fmov" "s");
268295
// CHECK: //NO_APP
269296
check!(vreg_i64x1 i64x1 vreg "fmov" "s");
270297

271-
// neon-LABEL: vreg_f16x4:
272-
// neon: @APP
273-
// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
274-
// neon: @NO_APP
275-
#[cfg(neon)]
276-
check!(vreg_f16x4 f16x4 vreg "vmov.f64");
277-
278-
// neon-LABEL: vreg_f16x8:
279-
// neon: @APP
280-
// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
281-
// neon: @NO_APP
282-
#[cfg(neon)]
283-
check!(vreg_f16x8 f16x8 vreg "vmov");
298+
// CHECK-LABEL: {{("#)?}}vreg_f16x4{{"?}}
299+
// CHECK: //APP
300+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
301+
// CHECK: //NO_APP
302+
check!(vreg_f16x4 f16x4 vreg "fmov" "s");
284303

285304
// CHECK-LABEL: {{("#)?}}vreg_f32x2{{"?}}
286305
// CHECK: //APP
@@ -318,6 +337,12 @@ check!(vreg_i32x4 i32x4 vreg "fmov" "s");
318337
// CHECK: //NO_APP
319338
check!(vreg_i64x2 i64x2 vreg "fmov" "s");
320339

340+
// CHECK-LABEL: {{("#)?}}vreg_f16x8{{"?}}
341+
// CHECK: //APP
342+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
343+
// CHECK: //NO_APP
344+
check!(vreg_f16x8 f16x8 vreg "fmov" "s");
345+
321346
// CHECK-LABEL: {{("#)?}}vreg_f32x4{{"?}}
322347
// CHECK: //APP
323348
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -342,6 +367,12 @@ check!(vreg_low16_i8 i8 vreg_low16 "fmov" "s");
342367
// CHECK: //NO_APP
343368
check!(vreg_low16_i16 i16 vreg_low16 "fmov" "s");
344369

370+
// CHECK-LABEL: {{("#)?}}vreg_low16_f16{{"?}}
371+
// CHECK: //APP
372+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
373+
// CHECK: //NO_APP
374+
check!(vreg_low16_f16 f16 vreg_low16 "fmov" "s");
375+
345376
// CHECK-LABEL: {{("#)?}}vreg_low16_f32{{"?}}
346377
// CHECK: //APP
347378
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -360,6 +391,12 @@ check!(vreg_low16_i64 i64 vreg_low16 "fmov" "s");
360391
// CHECK: //NO_APP
361392
check!(vreg_low16_f64 f64 vreg_low16 "fmov" "s");
362393

394+
// CHECK-LABEL: {{("#)?}}vreg_low16_f128{{"?}}
395+
// CHECK: //APP
396+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
397+
// CHECK: //NO_APP
398+
check!(vreg_low16_f128 f128 vreg_low16 "fmov" "s");
399+
363400
// CHECK-LABEL: {{("#)?}}vreg_low16_ptr{{"?}}
364401
// CHECK: //APP
365402
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -390,6 +427,12 @@ check!(vreg_low16_i32x2 i32x2 vreg_low16 "fmov" "s");
390427
// CHECK: //NO_APP
391428
check!(vreg_low16_i64x1 i64x1 vreg_low16 "fmov" "s");
392429

430+
// CHECK-LABEL: {{("#)?}}vreg_low16_f16x4{{"?}}
431+
// CHECK: //APP
432+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
433+
// CHECK: //NO_APP
434+
check!(vreg_low16_f16x4 f16x4 vreg_low16 "fmov" "s");
435+
393436
// CHECK-LABEL: {{("#)?}}vreg_low16_f32x2{{"?}}
394437
// CHECK: //APP
395438
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -426,6 +469,12 @@ check!(vreg_low16_i32x4 i32x4 vreg_low16 "fmov" "s");
426469
// CHECK: //NO_APP
427470
check!(vreg_low16_i64x2 i64x2 vreg_low16 "fmov" "s");
428471

472+
// CHECK-LABEL: {{("#)?}}vreg_low16_f16x8{{"?}}
473+
// CHECK: //APP
474+
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
475+
// CHECK: //NO_APP
476+
check!(vreg_low16_f16x8 f16x8 vreg_low16 "fmov" "s");
477+
429478
// CHECK-LABEL: {{("#)?}}vreg_low16_f32x4{{"?}}
430479
// CHECK: //APP
431480
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
@@ -450,6 +499,12 @@ check_reg!(x0_i8 i8 "x0" "mov");
450499
// CHECK: //NO_APP
451500
check_reg!(x0_i16 i16 "x0" "mov");
452501

502+
// CHECK-LABEL: {{("#)?}}x0_f16{{"?}}
503+
// CHECK: //APP
504+
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
505+
// CHECK: //NO_APP
506+
check_reg!(x0_f16 f16 "x0" "mov");
507+
453508
// CHECK-LABEL: {{("#)?}}x0_i32{{"?}}
454509
// CHECK: //APP
455510
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
@@ -492,6 +547,12 @@ check_reg!(v0_i8 i8 "s0" "fmov");
492547
// CHECK: //NO_APP
493548
check_reg!(v0_i16 i16 "s0" "fmov");
494549

550+
// CHECK-LABEL: {{("#)?}}v0_f16{{"?}}
551+
// CHECK: //APP
552+
// CHECK: fmov s0, s0
553+
// CHECK: //NO_APP
554+
check_reg!(v0_f16 f16 "s0" "fmov");
555+
495556
// CHECK-LABEL: {{("#)?}}v0_i32{{"?}}
496557
// CHECK: //APP
497558
// CHECK: fmov s0, s0
@@ -516,6 +577,12 @@ check_reg!(v0_i64 i64 "s0" "fmov");
516577
// CHECK: //NO_APP
517578
check_reg!(v0_f64 f64 "s0" "fmov");
518579

580+
// CHECK-LABEL: {{("#)?}}v0_f128{{"?}}
581+
// CHECK: //APP
582+
// CHECK: fmov s0, s0
583+
// CHECK: //NO_APP
584+
check_reg!(v0_f128 f128 "s0" "fmov");
585+
519586
// CHECK-LABEL: {{("#)?}}v0_ptr{{"?}}
520587
// CHECK: //APP
521588
// CHECK: fmov s0, s0
@@ -546,6 +613,12 @@ check_reg!(v0_i32x2 i32x2 "s0" "fmov");
546613
// CHECK: //NO_APP
547614
check_reg!(v0_i64x1 i64x1 "s0" "fmov");
548615

616+
// CHECK-LABEL: {{("#)?}}v0_f16x4{{"?}}
617+
// CHECK: //APP
618+
// CHECK: fmov s0, s0
619+
// CHECK: //NO_APP
620+
check_reg!(v0_f16x4 f16x4 "s0" "fmov");
621+
549622
// CHECK-LABEL: {{("#)?}}v0_f32x2{{"?}}
550623
// CHECK: //APP
551624
// CHECK: fmov s0, s0
@@ -582,6 +655,12 @@ check_reg!(v0_i32x4 i32x4 "s0" "fmov");
582655
// CHECK: //NO_APP
583656
check_reg!(v0_i64x2 i64x2 "s0" "fmov");
584657

658+
// CHECK-LABEL: {{("#)?}}v0_f16x8{{"?}}
659+
// CHECK: //APP
660+
// CHECK: fmov s0, s0
661+
// CHECK: //NO_APP
662+
check_reg!(v0_f16x8 f16x8 "s0" "fmov");
663+
585664
// CHECK-LABEL: {{("#)?}}v0_f32x4{{"?}}
586665
// CHECK: //APP
587666
// CHECK: fmov s0, s0

tests/ui/asm/aarch64/type-check-3.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ error: type `Simd256bit` cannot be used with this register class
111111
LL | asm!("{}", in(vreg) f64x4);
112112
| ^^^^^
113113
|
114-
= note: register class `vreg` supports these types: i8, i16, i32, i64, f16, f32, f64, i8x8, i16x4, i32x2, i64x1, f32x2, f64x1, i8x16, i16x8, i32x4, i64x2, f16x4, f16x8, f32x4, f64x2
114+
= note: register class `vreg` supports these types: i8, i16, i32, i64, f16, f32, f64, f128, i8x8, i16x4, i32x2, i64x1, f16x4, f32x2, f64x1, i8x16, i16x8, i32x4, i64x2, f16x8, f32x4, f64x2
115115

116116
error: incompatible types for asm inout argument
117117
--> $DIR/type-check-3.rs:88:33

0 commit comments

Comments
 (0)