Skip to content

Commit b400920

Browse files
committed
Refactoring
1 parent d75ee25 commit b400920

File tree

3 files changed

+149
-22
lines changed

3 files changed

+149
-22
lines changed

src/float/div.rs

+75-20
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
1+
/*
2+
3+
Solving for `a / b`, which is `res = m_a*2^p_a / m_b*2^p_b`.
4+
5+
- Separate the exponent and significand
6+
`res = (m_a / m_b) * 2^(p_a - p_b)`
7+
- Check for early exits
8+
- If `a` or `b` are subnormal, normalize by shifting the mantissa and adjusting the exponent.
9+
- Shift the significand (with implicit bit) fully left so that arithmetic can happen with greater
10+
precision.
11+
- Calculate the reciprocal of `b`, `r`
12+
- Multiply: `res = m_a * r_b * 2^(p_a - p_b)`
13+
14+
The most complicated part of this process is calculating the reciprocal.
15+
16+
Note that variables named e.g. `uq0` refer to Q notation. E.g. Q1.31 refers to a fixed-point
17+
number that has 1 bit of integer and 31 bits of decimal.
18+
19+
*/
20+
121
use crate::float::Float;
222
use crate::int::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
323

@@ -8,8 +28,9 @@ trait FloatDivision: Float
828
where
929
Self::Int: DInt,
1030
{
11-
/// Iterations that are done at half of the float's width
31+
/// Iterations that are done at half of the float's width, done for optimization.
1232
const HALF_ITERATIONS: usize;
33+
1334
/// Iterations that are done at the full float's width. Must be at least one.
1435
const FULL_ITERATIONS: usize;
1536

@@ -51,6 +72,10 @@ where
5172
}
5273
}
5374

75+
if Self::FULL_ITERATIONS < 1 {
76+
panic!("Must have at least one full iteration");
77+
}
78+
5479
if Self::BITS == 32 && Self::HALF_ITERATIONS == 2 && Self::FULL_ITERATIONS == 1 {
5580
74u16
5681
} else if Self::BITS == 32 && Self::HALF_ITERATIONS == 0 && Self::FULL_ITERATIONS == 3 {
@@ -84,6 +109,18 @@ impl FloatDivision for f64 {
84109
const C_HW: HalfRep<Self> = 0x7504F333 << (HalfRep::<Self>::BITS - 32);
85110
}
86111

112+
#[cfg(not(feature = "no-f16-f128"))]
113+
impl FloatDivision for f128 {
114+
const HALF_ITERATIONS: usize = 4;
115+
const FULL_ITERATIONS: usize = 1;
116+
117+
const C_HW: HalfRep<Self> = 0x7504F333 << (HalfRep::<Self>::BITS - 32);
118+
}
119+
120+
extern crate std;
121+
#[allow(unused)]
122+
use std::{dbg, fmt, println};
123+
87124
fn div<F>(a: F, b: F) -> F
88125
where
89126
F: FloatDivision,
@@ -108,6 +145,11 @@ where
108145
u64: CastInto<F::Int>,
109146
u64: CastInto<HalfRep<F>>,
110147
u128: CastInto<F::Int>,
148+
149+
// debugging
150+
F::Int: fmt::LowerHex,
151+
F::Int: fmt::Display,
152+
F::SignedInt: fmt::Display,
111153
{
112154
let one = F::Int::ONE;
113155
let zero = F::Int::ZERO;
@@ -131,16 +173,17 @@ where
131173
let a_rep = a.repr();
132174
let b_rep = b.repr();
133175

134-
// FIXME(tgross35): use u32/i32 and not `Int` to store exponents, since that is enough for up to
135-
// `f256`. This should make f128 div faster.
136176
// Exponent numeric representationm not accounting for bias
137177
let a_exponent = (a_rep >> significand_bits) & exponent_sat;
138178
let b_exponent = (b_rep >> significand_bits) & exponent_sat;
139179
let quotient_sign = (a_rep ^ b_rep) & sign_bit;
140180

141181
let mut a_significand = a_rep & significand_mask;
142182
let mut b_significand = b_rep & significand_mask;
143-
let mut scale = 0;
183+
184+
// The exponent of our final result in its encoded form
185+
let mut res_exponent: i32 =
186+
i32::cast_from(a_exponent) - i32::cast_from(b_exponent) + (exponent_bias as i32);
144187

145188
// Detect if a or b is zero, denormal, infinity, or NaN.
146189
if a_exponent.wrapping_sub(one) >= (exponent_sat - one)
@@ -193,33 +236,35 @@ where
193236
// adjustment.
194237
if a_abs < implicit_bit {
195238
let (exponent, significand) = F::normalize(a_significand);
196-
scale += exponent;
239+
res_exponent += exponent;
197240
a_significand = significand;
198241
}
199242

200243
// b is denormal. Renormalize it and set the scale to include the necessary exponent
201244
// adjustment.
202245
if b_abs < implicit_bit {
203246
let (exponent, significand) = F::normalize(b_significand);
204-
scale -= exponent;
247+
res_exponent -= exponent;
205248
b_significand = significand;
206249
}
207250
}
208251

209-
// Set the implicit significand bit. If we fell through from the
252+
// Set the implicit significand bit. If we fell through from the
210253
// denormal path it was already set by normalize( ), but setting it twice
211254
// won't hurt anything.
212255
a_significand |= implicit_bit;
213256
b_significand |= implicit_bit;
214257

215-
let mut written_exponent: F::SignedInt = F::SignedInt::from_unsigned(
216-
(a_exponent
217-
.wrapping_sub(b_exponent)
218-
.wrapping_add(scale.cast()))
219-
.wrapping_add(exponent_bias.cast()),
258+
println!("a sig: {:#034x}\nb sig: {:#034x}\na exp: {a_exponent}, b exp: {b_exponent}, written: {res_exponent}",
259+
a_significand,
260+
b_significand,
220261
);
262+
263+
// Transform to a fixed-point representation
221264
let b_uq1 = b_significand << (F::BITS - significand_bits - 1);
222265

266+
println!("b_uq1: {:#034x}", b_uq1);
267+
223268
// Align the significand of b as a UQ1.(n-1) fixed-point number in the range
224269
// [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax
225270
// polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2.
@@ -257,7 +302,9 @@ where
257302
// mode into account!
258303
let mut x_uq0 = if F::HALF_ITERATIONS > 0 {
259304
// Starting with (n-1) half-width iterations
260-
let b_uq1_hw: HalfRep<F> = (b_significand >> (significand_bits + 1 - hw)).cast();
305+
let b_uq1_hw: HalfRep<F> = b_uq1.hi();
306+
307+
// (b_significand >> (significand_bits + 1 - hw)).cast();
261308

262309
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
263310
// with W0 being either 16 or 32 and W0 <= HW.
@@ -446,7 +493,7 @@ where
446493
// effectively doubling its value as well as its error estimation.
447494
let residual_lo = (a_significand << (significand_bits + 1))
448495
.wrapping_sub(quotient_uq1.wrapping_mul(b_significand));
449-
written_exponent -= F::SignedInt::ONE;
496+
res_exponent -= 1;
450497
a_significand <<= 1;
451498
residual_lo
452499
} else {
@@ -484,29 +531,30 @@ where
484531
// For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required)
485532
//
486533
// If we have overflowed the exponent, return infinity
487-
if written_exponent >= F::SignedInt::cast_from(exponent_sat) {
534+
if res_exponent >= i32::cast_from(exponent_sat) {
488535
return F::from_repr(inf_rep | quotient_sign);
489536
}
490537

491538
// Now, quotient <= the correctly-rounded result
492539
// and may need taking NextAfter() up to 3 times (see error estimates above)
493540
// r = a - b * q
494-
let mut abs_result = if written_exponent > F::SignedInt::ZERO {
541+
let mut abs_result = if res_exponent > 0 {
495542
let mut ret = quotient & significand_mask;
496-
ret |= written_exponent.unsigned() << significand_bits;
543+
ret |= F::Int::from(res_exponent as u32) << significand_bits;
497544
residual_lo <<= 1;
498545
ret
499546
} else {
500-
if (F::SignedInt::cast_from(significand_bits) + written_exponent) < F::SignedInt::ZERO {
547+
if ((significand_bits as i32) + res_exponent) < 0 {
501548
return F::from_repr(quotient_sign);
502549
}
503550

504-
let ret = quotient.wrapping_shr(u32::cast_from(written_exponent.wrapping_neg()) + 1);
551+
let ret = quotient.wrapping_shr(u32::cast_from(res_exponent.wrapping_neg()) + 1);
505552
residual_lo = a_significand
506-
.wrapping_shl(significand_bits.wrapping_add(CastInto::<u32>::cast(written_exponent)))
553+
.wrapping_shl(significand_bits.wrapping_add(CastInto::<u32>::cast(res_exponent)))
507554
.wrapping_sub(ret.wrapping_mul(b_significand) << 1);
508555
ret
509556
};
557+
dbg!(res_exponent);
510558

511559
residual_lo += abs_result & one; // tie to even
512560
// conditionally turns the below LT comparison into LTE
@@ -539,6 +587,13 @@ intrinsics! {
539587
div(a, b)
540588
}
541589

590+
#[avr_skip]
591+
#[ppc_alias = __divkf3]
592+
#[cfg(not(feature = "no-f16-f128"))]
593+
pub extern "C" fn __divtf3(a: f128, b: f128) -> f128 {
594+
div(a, b)
595+
}
596+
542597
#[cfg(target_arch = "arm")]
543598
pub extern "C" fn __divsf3vfp(a: f32, b: f32) -> f32 {
544599
a / b

src/float/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ pub(crate) trait Float:
5151
/// The bitwidth of the exponent
5252
const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1;
5353

54-
/// The saturated value of the exponent (infinite representation)
54+
/// The saturated value of the exponent (infinite representation), in the rightmost postiion.
5555
const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1;
5656

5757
/// The exponent bias value
@@ -175,7 +175,7 @@ macro_rules! float_impl {
175175
fn normalize(significand: Self::Int) -> (i32, Self::Int) {
176176
let shift = significand
177177
.leading_zeros()
178-
.wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros());
178+
.wrapping_sub(Self::EXPONENT_BITS);
179179
(
180180
1i32.wrapping_sub(shift as i32),
181181
significand << shift as Self::Int,

testcrate/tests/div_rem.rs

+72
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#![feature(f128)]
12
#![allow(unused_macros)]
23

34
use compiler_builtins::int::sdiv::{__divmoddi4, __divmodsi4, __divmodti4};
@@ -157,3 +158,74 @@ mod float_div_arm {
157158
f64, __divdf3vfp, Double, all();
158159
}
159160
}
161+
162+
#[cfg(not(feature = "no-f16-f128"))]
163+
#[cfg(not(all(target_arch = "x86", not(target_feature = "sse"))))]
164+
mod float_div_f128 {
165+
use super::*;
166+
167+
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
168+
float! {
169+
f128, __divtf3, Quad, not(feature = "no-sys-f128");
170+
}
171+
172+
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
173+
float! {
174+
f128, __divkf3, Quad, not(feature = "no-sys-f128");
175+
}
176+
}
177+
178+
#[test]
179+
fn problem_f128() {
180+
use compiler_builtins::float::div::__divtf3;
181+
182+
let a = f128::from_bits(0x00000000000000000000000000000001);
183+
let b = f128::from_bits(0x0001FFFFFFFFFFFFFFFFFFFFFFFFFFFF);
184+
let res = __divtf3(a, b);
185+
println!(
186+
"{:#036x} / {:#036x} = {:#036x}",
187+
a.to_bits(),
188+
b.to_bits(),
189+
res.to_bits()
190+
);
191+
// got 0x3f8f0000000000000000000000000001
192+
// exp 0x3f8e0000000000000000000000000001
193+
assert_eq!(res.to_bits(), 0x3F8E0000000000000000000000000001);
194+
panic!();
195+
}
196+
197+
#[test]
198+
fn not_problem_f64() {
199+
use compiler_builtins::float::div::__divdf3;
200+
201+
let a = f64::from_bits(0x0000000000000001);
202+
let b = f64::from_bits(0x001FFFFFFFFFFFFF);
203+
let res = __divdf3(a, b);
204+
println!(
205+
"{:#018x} / {:#018x} = {:#018x}",
206+
a.to_bits(),
207+
b.to_bits(),
208+
res.to_bits()
209+
);
210+
// 0x3CA0000000000001
211+
assert_eq!(res.to_bits(), 0x3CA0000000000001);
212+
panic!();
213+
}
214+
215+
#[test]
216+
fn not_problem_f32() {
217+
use compiler_builtins::float::div::__divsf3;
218+
219+
let a = f32::from_bits(0x00000001);
220+
let b = f32::from_bits(0x00FFFFFF);
221+
let res = __divsf3(a, b);
222+
println!(
223+
"{:#010x} / {:#010x} = {:#010x}",
224+
a.to_bits(),
225+
b.to_bits(),
226+
res.to_bits()
227+
);
228+
// 0x33800001
229+
assert_eq!(res.to_bits(), 0x33800001);
230+
panic!();
231+
}

0 commit comments

Comments
 (0)