Skip to content

Commit 9ed21c4

Browse files
committed
Add __extendhfdf2 and add __truncdfhf2 test
LLVM doesn't seem to emit this intrinsic but it probably should, in some cases it lowers f16->f64 conversions as f16->f32->f64 with two libcalls. GCC provides this intrinsic so it is good to have anyway. Additionally, add a test for f64->f16 which was missing. [1]: https://rust.godbolt.org/z/xezM9PEnz
1 parent f6a6911 commit 9ed21c4

File tree

6 files changed

+47
-2
lines changed

6 files changed

+47
-2
lines changed

src/float/extend.rs

+8
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,14 @@ intrinsics! {
9696
extend(a)
9797
}
9898

99+
#[avr_skip]
100+
#[aapcs_on_arm]
101+
#[apple_f16_arg_abi]
102+
#[cfg(f16_enabled)]
103+
pub extern "C" fn __extendhfdf2(a: f16) -> f64 {
104+
extend(a)
105+
}
106+
99107
#[avr_skip]
100108
#[aapcs_on_arm]
101109
#[ppc_alias = __extendhfkf2]

testcrate/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"]
4343
# Some platforms have some f128 functions but everything except integer conversions
4444
no-sys-f128-int-convert = []
4545
no-sys-f16-f128-convert = []
46+
no-sys-f16-f64-convert = []
4647
# Skip tests that rely on f16 symbols being available on the system
47-
no-sys-f16 = []
48+
no-sys-f16 = ["no-sys-f16-f64-convert"]
4849

4950
# Enable report generation without bringing in more dependencies by default
5051
benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]

testcrate/benches/float_extend.rs

+23
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,28 @@ float_bench! {
2828
],
2929
}
3030

31+
#[cfg(f16_enabled)]
32+
float_bench! {
33+
name: extend_f16_f64,
34+
sig: (a: f16) -> f64,
35+
crate_fn: extend::__extendhfdf2,
36+
sys_fn: __extendhfdf2,
37+
sys_available: not(feature = "no-sys-f16-f64-convert"),
38+
asm: [
39+
#[cfg(target_arch = "aarch64")] {
40+
let ret: f64;
41+
asm!(
42+
"fcvt {ret:d}, {a:h}",
43+
a = in(vreg) a,
44+
ret = lateout(vreg) ret,
45+
options(nomem, nostack, pure),
46+
);
47+
48+
ret
49+
};
50+
],
51+
}
52+
3153
#[cfg(all(f16_enabled, f128_enabled))]
3254
float_bench! {
3355
name: extend_f16_f128,
@@ -93,6 +115,7 @@ pub fn float_extend() {
93115
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
94116
{
95117
extend_f16_f32(&mut criterion);
118+
extend_f16_f64(&mut criterion);
96119

97120
#[cfg(f128_enabled)]
98121
extend_f16_f128(&mut criterion);

testcrate/benches/float_trunc.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ float_bench! {
3333
sig: (a: f64) -> f16,
3434
crate_fn: trunc::__truncdfhf2,
3535
sys_fn: __truncdfhf2,
36-
sys_available: not(feature = "no-sys-f16"),
36+
sys_available: not(feature = "no-sys-f16-f64-convert"),
3737
asm: [
3838
#[cfg(target_arch = "aarch64")] {
3939
let ret: f16;

testcrate/build.rs

+11
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ enum Feature {
66
NoSysF128,
77
NoSysF128IntConvert,
88
NoSysF16,
9+
NoSysF16F64Convert,
910
NoSysF16F128Convert,
1011
}
1112

@@ -66,16 +67,26 @@ fn main() {
6667
|| target.arch == "wasm64"
6768
{
6869
features.insert(Feature::NoSysF16);
70+
features.insert(Feature::NoSysF16F64Convert);
6971
features.insert(Feature::NoSysF16F128Convert);
7072
}
7173

74+
// These platforms are missing either `__extendhfdf2` or `__truncdfhf2`.
75+
if target.vendor == "apple" || target.os == "windows" {
76+
features.insert(Feature::NoSysF16F64Convert);
77+
}
78+
7279
for feature in features {
7380
let (name, warning) = match feature {
7481
Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"),
7582
Feature::NoSysF128IntConvert => (
7683
"no-sys-f128-int-convert",
7784
"using apfloat fallback for f128 <-> int conversions",
7885
),
86+
Feature::NoSysF16F64Convert => (
87+
"no-sys-f16-f64-convert",
88+
"using apfloat fallback for f16 <-> f64 conversions",
89+
),
7990
Feature::NoSysF16F128Convert => (
8091
"no-sys-f16-f128-convert",
8192
"using apfloat fallback for f16 <-> f128 conversions",

testcrate/tests/conv.rs

+2
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ mod extend {
311311
extend,
312312
f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16");
313313
f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16");
314+
f16 => f64, Half => Double, __extendhfdf2, not(feature = "no-sys-f16-f64-convert");
314315
f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert");
315316
f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128");
316317
f64 => f128, Double => Quad, __extenddftf2, not(feature = "no-sys-f128");
@@ -340,6 +341,7 @@ mod trunc {
340341
trunc,
341342
f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16");
342343
f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16");
344+
f64 => f16, Double => Half, __truncdfhf2, not(feature = "no-sys-f16-f64-convert");
343345
f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert");
344346
f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128");
345347
f128 => f64, Quad => Double, __trunctfdf2, not(feature = "no-sys-f128");

0 commit comments

Comments
 (0)