Refactoring

tgross35 · tgross35 · commit b400920d9338 · 2024-07-27T04:40:47.000-04:00
diff --git a/src/float/div.rs b/src/float/div.rs
@@ -1,3 +1,23 @@
+/*
+
+Solving for `a / b`, which is `res = m_a*2^p_a / m_b*2^p_b`.
+
+- Separate the exponent and significand
+  `res = (m_a / m_b) * 2^(p_a - p_b)`
+- Check for early exits
+- If `a` or `b` are subnormal, normalize by shifting the mantissa and adjusting the exponent.
+- Shift the significand (with implicit bit) fully left so that arithmetic can happen with greater
+  precision.
+- Calculate the reciprocal of `b`, `r`
+- Multiply: `res = m_a * r_b * 2^(p_a - p_b)`
+
+The most complicated part of this process is calculating the reciprocal.
+
+Note that variables named e.g. `uq0` refer to Q notation. E.g. Q1.31 refers to a fixed-point
+number that has 1 bit of integer and 31 bits of decimal.
+
+*/
+
 use crate::float::Float;
 use crate::int::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
 
@@ -8,8 +28,9 @@ trait FloatDivision: Float
 where
     Self::Int: DInt,
 {
-    /// Iterations that are done at half of the float's width
+    /// Iterations that are done at half of the float's width, done for optimization.
     const HALF_ITERATIONS: usize;
+
     /// Iterations that are done at the full float's width. Must be at least one.
     const FULL_ITERATIONS: usize;
 
@@ -51,6 +72,10 @@ where
             }
         }
 
+        if Self::FULL_ITERATIONS < 1 {
+            panic!("Must have at least one full iteration");
+        }
+
         if Self::BITS == 32 && Self::HALF_ITERATIONS == 2 && Self::FULL_ITERATIONS == 1 {
             74u16
         } else if Self::BITS == 32 && Self::HALF_ITERATIONS == 0 && Self::FULL_ITERATIONS == 3 {
@@ -84,6 +109,18 @@ impl FloatDivision for f64 {
     const C_HW: HalfRep<Self> = 0x7504F333 << (HalfRep::<Self>::BITS - 32);
 }
 
+#[cfg(not(feature = "no-f16-f128"))]
+impl FloatDivision for f128 {
+    const HALF_ITERATIONS: usize = 4;
+    const FULL_ITERATIONS: usize = 1;
+
+    const C_HW: HalfRep<Self> = 0x7504F333 << (HalfRep::<Self>::BITS - 32);
+}
+
+extern crate std;
+#[allow(unused)]
+use std::{dbg, fmt, println};
+
 fn div<F>(a: F, b: F) -> F
 where
     F: FloatDivision,
@@ -108,6 +145,11 @@ where
     u64: CastInto<F::Int>,
     u64: CastInto<HalfRep<F>>,
     u128: CastInto<F::Int>,
+
+    // debugging
+    F::Int: fmt::LowerHex,
+    F::Int: fmt::Display,
+    F::SignedInt: fmt::Display,
 {
     let one = F::Int::ONE;
     let zero = F::Int::ZERO;
@@ -131,16 +173,17 @@ where
     let a_rep = a.repr();
     let b_rep = b.repr();
 
-    // FIXME(tgross35): use u32/i32 and not `Int` to store exponents, since that is enough for up to
-    // `f256`. This should make f128 div faster.
     // Exponent numeric representationm not accounting for bias
     let a_exponent = (a_rep >> significand_bits) & exponent_sat;
     let b_exponent = (b_rep >> significand_bits) & exponent_sat;
     let quotient_sign = (a_rep ^ b_rep) & sign_bit;
 
     let mut a_significand = a_rep & significand_mask;
     let mut b_significand = b_rep & significand_mask;
-    let mut scale = 0;
+
+    // The exponent of our final result in its encoded form
+    let mut res_exponent: i32 =
+        i32::cast_from(a_exponent) - i32::cast_from(b_exponent) + (exponent_bias as i32);
 
     // Detect if a or b is zero, denormal, infinity, or NaN.
     if a_exponent.wrapping_sub(one) >= (exponent_sat - one)
@@ -193,33 +236,35 @@ where
         // adjustment.
         if a_abs < implicit_bit {
             let (exponent, significand) = F::normalize(a_significand);
-            scale += exponent;
+            res_exponent += exponent;
             a_significand = significand;
         }
 
         // b is denormal. Renormalize it and set the scale to include the necessary exponent
         // adjustment.
         if b_abs < implicit_bit {
             let (exponent, significand) = F::normalize(b_significand);
-            scale -= exponent;
+            res_exponent -= exponent;
             b_significand = significand;
         }
     }
 
-    // Set the implicit significand bit.  If we fell through from the
+    // Set the implicit significand bit. If we fell through from the
     // denormal path it was already set by normalize( ), but setting it twice
     // won't hurt anything.
     a_significand |= implicit_bit;
     b_significand |= implicit_bit;
 
-    let mut written_exponent: F::SignedInt = F::SignedInt::from_unsigned(
-        (a_exponent
-            .wrapping_sub(b_exponent)
-            .wrapping_add(scale.cast()))
-        .wrapping_add(exponent_bias.cast()),
+    println!("a sig: {:#034x}\nb sig: {:#034x}\na exp: {a_exponent}, b exp: {b_exponent}, written: {res_exponent}",
+        a_significand,
+        b_significand,
     );
+
+    // Transform to a fixed-point representation
     let b_uq1 = b_significand << (F::BITS - significand_bits - 1);
 
+    println!("b_uq1: {:#034x}", b_uq1);
+
     // Align the significand of b as a UQ1.(n-1) fixed-point number in the range
     // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax
     // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2.
@@ -257,7 +302,9 @@ where
     // mode into account!
     let mut x_uq0 = if F::HALF_ITERATIONS > 0 {
         // Starting with (n-1) half-width iterations
-        let b_uq1_hw: HalfRep<F> = (b_significand >> (significand_bits + 1 - hw)).cast();
+        let b_uq1_hw: HalfRep<F> = b_uq1.hi();
+
+        // (b_significand >> (significand_bits + 1 - hw)).cast();
 
         // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
         // with W0 being either 16 or 32 and W0 <= HW.
@@ -446,7 +493,7 @@ where
         // effectively doubling its value as well as its error estimation.
         let residual_lo = (a_significand << (significand_bits + 1))
             .wrapping_sub(quotient_uq1.wrapping_mul(b_significand));
-        written_exponent -= F::SignedInt::ONE;
+        res_exponent -= 1;
         a_significand <<= 1;
         residual_lo
     } else {
@@ -484,29 +531,30 @@ where
     // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required)
     //
     // If we have overflowed the exponent, return infinity
-    if written_exponent >= F::SignedInt::cast_from(exponent_sat) {
+    if res_exponent >= i32::cast_from(exponent_sat) {
         return F::from_repr(inf_rep | quotient_sign);
     }
 
     // Now, quotient <= the correctly-rounded result
     // and may need taking NextAfter() up to 3 times (see error estimates above)
     // r = a - b * q
-    let mut abs_result = if written_exponent > F::SignedInt::ZERO {
+    let mut abs_result = if res_exponent > 0 {
         let mut ret = quotient & significand_mask;
-        ret |= written_exponent.unsigned() << significand_bits;
+        ret |= F::Int::from(res_exponent as u32) << significand_bits;
         residual_lo <<= 1;
         ret
     } else {
-        if (F::SignedInt::cast_from(significand_bits) + written_exponent) < F::SignedInt::ZERO {
+        if ((significand_bits as i32) + res_exponent) < 0 {
             return F::from_repr(quotient_sign);
         }
 
-        let ret = quotient.wrapping_shr(u32::cast_from(written_exponent.wrapping_neg()) + 1);
+        let ret = quotient.wrapping_shr(u32::cast_from(res_exponent.wrapping_neg()) + 1);
         residual_lo = a_significand
-            .wrapping_shl(significand_bits.wrapping_add(CastInto::<u32>::cast(written_exponent)))
+            .wrapping_shl(significand_bits.wrapping_add(CastInto::<u32>::cast(res_exponent)))
             .wrapping_sub(ret.wrapping_mul(b_significand) << 1);
         ret
     };
+    dbg!(res_exponent);
 
     residual_lo += abs_result & one; // tie to even
                                      // conditionally turns the below LT comparison into LTE
@@ -539,6 +587,13 @@ intrinsics! {
         div(a, b)
     }
 
+    #[avr_skip]
+    #[ppc_alias = __divkf3]
+    #[cfg(not(feature = "no-f16-f128"))]
+    pub extern "C" fn __divtf3(a: f128, b: f128) -> f128 {
+        div(a, b)
+    }
+
     #[cfg(target_arch = "arm")]
     pub extern "C" fn __divsf3vfp(a: f32, b: f32) -> f32 {
         a / b
diff --git a/src/float/mod.rs b/src/float/mod.rs
@@ -51,7 +51,7 @@ pub(crate) trait Float:
     /// The bitwidth of the exponent
     const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
 
-    /// The saturated value of the exponent (infinite representation)
+    /// The saturated value of the exponent (infinite representation), in the rightmost postiion.
     const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
 
     /// The exponent bias value
@@ -175,7 +175,7 @@ macro_rules! float_impl {
             fn normalize(significand: Self::Int) -> (i32, Self::Int) {
                 let shift = significand
                     .leading_zeros()
-                    .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros());
+                    .wrapping_sub(Self::EXPONENT_BITS);
                 (
                     1i32.wrapping_sub(shift as i32),
                     significand << shift as Self::Int,
diff --git a/testcrate/tests/div_rem.rs b/testcrate/tests/div_rem.rs
@@ -1,3 +1,4 @@
+#![feature(f128)]
 #![allow(unused_macros)]
 
 use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4};
@@ -157,3 +158,74 @@ mod float_div_arm {
         f64, __divdf3vfp, Double, all();
     }
 }
+
+#[cfg(not(feature = "no-f16-f128"))]
+#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
+mod float_div_f128 {
+    use super::*;
+
+    #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
+    float! {
+        f128, __divtf3, Quad, not(feature = "no-sys-f128");
+    }
+
+    #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
+    float! {
+        f128, __divkf3, Quad, not(feature = "no-sys-f128");
+    }
+}
+
+#[test]
+fn problem_f128() {
+    use compiler_builtins::float::div::__divtf3;
+
+    let a = f128::from_bits(0x00000000000000000000000000000001);
+    let b = f128::from_bits(0x0001FFFFFFFFFFFFFFFFFFFFFFFFFFFF);
+    let res = __divtf3(a, b);
+    println!(
+        "{:#036x} / {:#036x} = {:#036x}",
+        a.to_bits(),
+        b.to_bits(),
+        res.to_bits()
+    );
+    // got 0x3f8f0000000000000000000000000001
+    // exp 0x3f8e0000000000000000000000000001
+    assert_eq!(res.to_bits(), 0x3F8E0000000000000000000000000001);
+    panic!();
+}
+
+#[test]
+fn not_problem_f64() {
+    use compiler_builtins::float::div::__divdf3;
+
+    let a = f64::from_bits(0x0000000000000001);
+    let b = f64::from_bits(0x001FFFFFFFFFFFFF);
+    let res = __divdf3(a, b);
+    println!(
+        "{:#018x} / {:#018x} = {:#018x}",
+        a.to_bits(),
+        b.to_bits(),
+        res.to_bits()
+    );
+    // 0x3CA0000000000001
+    assert_eq!(res.to_bits(), 0x3CA0000000000001);
+    panic!();
+}
+
+#[test]
+fn not_problem_f32() {
+    use compiler_builtins::float::div::__divsf3;
+
+    let a = f32::from_bits(0x00000001);
+    let b = f32::from_bits(0x00FFFFFF);
+    let res = __divsf3(a, b);
+    println!(
+        "{:#010x} / {:#010x} = {:#010x}",
+        a.to_bits(),
+        b.to_bits(),
+        res.to_bits()
+    );
+    // 0x33800001
+    assert_eq!(res.to_bits(), 0x33800001);
+    panic!();
+}