Add __extendhfdf2 and add __truncdfhf2 test

tgross35 · tgross35 · commit 9ed21c4d56d2 · 2025-03-04T17:36:33.000-05:00
LLVM doesn't seem to emit this intrinsic but it probably should, in some cases it lowers f16->f64 conversions as f16->f32->f64 with two libcalls. GCC provides this intrinsic so it is good to have anyway. Additionally, add a test for f64->f16 which was missing. [1]: https://rust.godbolt.org/z/xezM9PEnz
diff --git a/src/float/extend.rs b/src/float/extend.rs
@@ -96,6 +96,14 @@ intrinsics! {
         extend(a)
     }
 
+    #[avr_skip]
+    #[aapcs_on_arm]
+    #[apple_f16_arg_abi]
+    #[cfg(f16_enabled)]
+    pub extern "C" fn __extendhfdf2(a: f16) -> f64 {
+        extend(a)
+    }
+
     #[avr_skip]
     #[aapcs_on_arm]
     #[ppc_alias = __extendhfkf2]
diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml
@@ -43,8 +43,9 @@ no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"]
 # Some platforms have some f128 functions but everything except integer conversions
 no-sys-f128-int-convert = []
 no-sys-f16-f128-convert = []
+no-sys-f16-f64-convert = []
 # Skip tests that rely on f16 symbols being available on the system
-no-sys-f16 = []
+no-sys-f16 = ["no-sys-f16-f64-convert"]
 
 # Enable report generation without bringing in more dependencies by default
 benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
diff --git a/testcrate/benches/float_extend.rs b/testcrate/benches/float_extend.rs
@@ -28,6 +28,28 @@ float_bench! {
     ],
 }
 
+#[cfg(f16_enabled)]
+float_bench! {
+    name: extend_f16_f64,
+    sig: (a: f16) -> f64,
+    crate_fn: extend::__extendhfdf2,
+    sys_fn: __extendhfdf2,
+    sys_available: not(feature = "no-sys-f16-f64-convert"),
+    asm: [
+        #[cfg(target_arch = "aarch64")] {
+            let ret: f64;
+            asm!(
+                "fcvt    {ret:d}, {a:h}",
+                a = in(vreg) a,
+                ret = lateout(vreg) ret,
+                options(nomem, nostack, pure),
+            );
+
+            ret
+        };
+    ],
+}
+
 #[cfg(all(f16_enabled, f128_enabled))]
 float_bench! {
     name: extend_f16_f128,
@@ -93,6 +115,7 @@ pub fn float_extend() {
     #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     {
         extend_f16_f32(&mut criterion);
+        extend_f16_f64(&mut criterion);
 
         #[cfg(f128_enabled)]
         extend_f16_f128(&mut criterion);
diff --git a/testcrate/benches/float_trunc.rs b/testcrate/benches/float_trunc.rs
@@ -33,7 +33,7 @@ float_bench! {
     sig: (a: f64) -> f16,
     crate_fn: trunc::__truncdfhf2,
     sys_fn: __truncdfhf2,
-    sys_available: not(feature = "no-sys-f16"),
+    sys_available: not(feature = "no-sys-f16-f64-convert"),
     asm: [
         #[cfg(target_arch = "aarch64")] {
             let ret: f16;
diff --git a/testcrate/build.rs b/testcrate/build.rs
@@ -6,6 +6,7 @@ enum Feature {
     NoSysF128,
     NoSysF128IntConvert,
     NoSysF16,
+    NoSysF16F64Convert,
     NoSysF16F128Convert,
 }
 
@@ -66,16 +67,26 @@ fn main() {
         || target.arch == "wasm64"
     {
         features.insert(Feature::NoSysF16);
+        features.insert(Feature::NoSysF16F64Convert);
         features.insert(Feature::NoSysF16F128Convert);
     }
 
+    // These platforms are missing either `__extendhfdf2` or `__truncdfhf2`.
+    if target.vendor == "apple" || target.os == "windows" {
+        features.insert(Feature::NoSysF16F64Convert);
+    }
+
     for feature in features {
         let (name, warning) = match feature {
             Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"),
             Feature::NoSysF128IntConvert => (
                 "no-sys-f128-int-convert",
                 "using apfloat fallback for f128 <-> int conversions",
             ),
+            Feature::NoSysF16F64Convert => (
+                "no-sys-f16-f64-convert",
+                "using apfloat fallback for f16 <-> f64 conversions",
+            ),
             Feature::NoSysF16F128Convert => (
                 "no-sys-f16-f128-convert",
                 "using apfloat fallback for f16 <-> f128 conversions",
diff --git a/testcrate/tests/conv.rs b/testcrate/tests/conv.rs
@@ -311,6 +311,7 @@ mod extend {
         extend,
         f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16");
         f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16");
+        f16 => f64, Half => Double, __extendhfdf2, not(feature = "no-sys-f16-f64-convert");
         f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert");
         f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128");
         f64 => f128, Double => Quad, __extenddftf2, not(feature = "no-sys-f128");
@@ -340,6 +341,7 @@ mod trunc {
         trunc,
         f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16");
         f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16");
+        f64 => f16, Double => Half, __truncdfhf2, not(feature = "no-sys-f16-f64-convert");
         f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert");
         f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128");
         f128 => f64, Quad => Double, __trunctfdf2, not(feature = "no-sys-f128");