1
+ /*
2
+
3
+ Solving for `a / b`, which is `res = m_a*2^p_a / m_b*2^p_b`.
4
+
5
+ - Separate the exponent and significand
6
+ `res = (m_a / m_b) * 2^(p_a - p_b)`
7
+ - Check for early exits
8
+ - If `a` or `b` are subnormal, normalize by shifting the mantissa and adjusting the exponent.
9
+ - Shift the significand (with implicit bit) fully left so that arithmetic can happen with greater
10
+ precision.
11
+ - Calculate the reciprocal of `b`, `r`
12
+ - Multiply: `res = m_a * r_b * 2^(p_a - p_b)`
13
+
14
+ The most complicated part of this process is calculating the reciprocal.
15
+
16
+ Note that variables named e.g. `uq0` refer to Q notation. E.g. Q1.31 refers to a fixed-point
17
+ number that has 1 bit of integer and 31 bits of decimal.
18
+
19
+ */
20
+
1
21
use crate :: float:: Float ;
2
22
use crate :: int:: { CastFrom , CastInto , DInt , HInt , Int , MinInt } ;
3
23
@@ -8,8 +28,9 @@ trait FloatDivision: Float
8
28
where
9
29
Self :: Int : DInt ,
10
30
{
11
- /// Iterations that are done at half of the float's width
31
+ /// Iterations that are done at half of the float's width, done for optimization.
12
32
const HALF_ITERATIONS : usize ;
33
+
13
34
/// Iterations that are done at the full float's width. Must be at least one.
14
35
const FULL_ITERATIONS : usize ;
15
36
51
72
}
52
73
}
53
74
75
+ if Self :: FULL_ITERATIONS < 1 {
76
+ panic ! ( "Must have at least one full iteration" ) ;
77
+ }
78
+
54
79
if Self :: BITS == 32 && Self :: HALF_ITERATIONS == 2 && Self :: FULL_ITERATIONS == 1 {
55
80
74u16
56
81
} else if Self :: BITS == 32 && Self :: HALF_ITERATIONS == 0 && Self :: FULL_ITERATIONS == 3 {
@@ -84,6 +109,18 @@ impl FloatDivision for f64 {
84
109
const C_HW : HalfRep < Self > = 0x7504F333 << ( HalfRep :: < Self > :: BITS - 32 ) ;
85
110
}
86
111
112
+ #[ cfg( not( feature = "no-f16-f128" ) ) ]
113
+ impl FloatDivision for f128 {
114
+ const HALF_ITERATIONS : usize = 4 ;
115
+ const FULL_ITERATIONS : usize = 1 ;
116
+
117
+ const C_HW : HalfRep < Self > = 0x7504F333 << ( HalfRep :: < Self > :: BITS - 32 ) ;
118
+ }
119
+
120
+ extern crate std;
121
+ #[ allow( unused) ]
122
+ use std:: { dbg, fmt, println} ;
123
+
87
124
fn div < F > ( a : F , b : F ) -> F
88
125
where
89
126
F : FloatDivision ,
@@ -108,6 +145,11 @@ where
108
145
u64 : CastInto < F :: Int > ,
109
146
u64 : CastInto < HalfRep < F > > ,
110
147
u128 : CastInto < F :: Int > ,
148
+
149
+ // debugging
150
+ F :: Int : fmt:: LowerHex ,
151
+ F :: Int : fmt:: Display ,
152
+ F :: SignedInt : fmt:: Display ,
111
153
{
112
154
let one = F :: Int :: ONE ;
113
155
let zero = F :: Int :: ZERO ;
@@ -131,16 +173,17 @@ where
131
173
let a_rep = a. repr ( ) ;
132
174
let b_rep = b. repr ( ) ;
133
175
134
- // FIXME(tgross35): use u32/i32 and not `Int` to store exponents, since that is enough for up to
135
- // `f256`. This should make f128 div faster.
136
176
// Exponent numeric representationm not accounting for bias
137
177
let a_exponent = ( a_rep >> significand_bits) & exponent_sat;
138
178
let b_exponent = ( b_rep >> significand_bits) & exponent_sat;
139
179
let quotient_sign = ( a_rep ^ b_rep) & sign_bit;
140
180
141
181
let mut a_significand = a_rep & significand_mask;
142
182
let mut b_significand = b_rep & significand_mask;
143
- let mut scale = 0 ;
183
+
184
+ // The exponent of our final result in its encoded form
185
+ let mut res_exponent: i32 =
186
+ i32:: cast_from ( a_exponent) - i32:: cast_from ( b_exponent) + ( exponent_bias as i32 ) ;
144
187
145
188
// Detect if a or b is zero, denormal, infinity, or NaN.
146
189
if a_exponent. wrapping_sub ( one) >= ( exponent_sat - one)
@@ -193,33 +236,35 @@ where
193
236
// adjustment.
194
237
if a_abs < implicit_bit {
195
238
let ( exponent, significand) = F :: normalize ( a_significand) ;
196
- scale += exponent;
239
+ res_exponent += exponent;
197
240
a_significand = significand;
198
241
}
199
242
200
243
// b is denormal. Renormalize it and set the scale to include the necessary exponent
201
244
// adjustment.
202
245
if b_abs < implicit_bit {
203
246
let ( exponent, significand) = F :: normalize ( b_significand) ;
204
- scale -= exponent;
247
+ res_exponent -= exponent;
205
248
b_significand = significand;
206
249
}
207
250
}
208
251
209
- // Set the implicit significand bit. If we fell through from the
252
+ // Set the implicit significand bit. If we fell through from the
210
253
// denormal path it was already set by normalize( ), but setting it twice
211
254
// won't hurt anything.
212
255
a_significand |= implicit_bit;
213
256
b_significand |= implicit_bit;
214
257
215
- let mut written_exponent: F :: SignedInt = F :: SignedInt :: from_unsigned (
216
- ( a_exponent
217
- . wrapping_sub ( b_exponent)
218
- . wrapping_add ( scale. cast ( ) ) )
219
- . wrapping_add ( exponent_bias. cast ( ) ) ,
258
+ println ! ( "a sig: {:#034x}\n b sig: {:#034x}\n a exp: {a_exponent}, b exp: {b_exponent}, written: {res_exponent}" ,
259
+ a_significand,
260
+ b_significand,
220
261
) ;
262
+
263
+ // Transform to a fixed-point representation
221
264
let b_uq1 = b_significand << ( F :: BITS - significand_bits - 1 ) ;
222
265
266
+ println ! ( "b_uq1: {:#034x}" , b_uq1) ;
267
+
223
268
// Align the significand of b as a UQ1.(n-1) fixed-point number in the range
224
269
// [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax
225
270
// polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2.
@@ -257,7 +302,9 @@ where
257
302
// mode into account!
258
303
let mut x_uq0 = if F :: HALF_ITERATIONS > 0 {
259
304
// Starting with (n-1) half-width iterations
260
- let b_uq1_hw: HalfRep < F > = ( b_significand >> ( significand_bits + 1 - hw) ) . cast ( ) ;
305
+ let b_uq1_hw: HalfRep < F > = b_uq1. hi ( ) ;
306
+
307
+ // (b_significand >> (significand_bits + 1 - hw)).cast();
261
308
262
309
// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
263
310
// with W0 being either 16 or 32 and W0 <= HW.
@@ -446,7 +493,7 @@ where
446
493
// effectively doubling its value as well as its error estimation.
447
494
let residual_lo = ( a_significand << ( significand_bits + 1 ) )
448
495
. wrapping_sub ( quotient_uq1. wrapping_mul ( b_significand) ) ;
449
- written_exponent -= F :: SignedInt :: ONE ;
496
+ res_exponent -= 1 ;
450
497
a_significand <<= 1 ;
451
498
residual_lo
452
499
} else {
@@ -484,29 +531,30 @@ where
484
531
// For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required)
485
532
//
486
533
// If we have overflowed the exponent, return infinity
487
- if written_exponent >= F :: SignedInt :: cast_from ( exponent_sat) {
534
+ if res_exponent >= i32 :: cast_from ( exponent_sat) {
488
535
return F :: from_repr ( inf_rep | quotient_sign) ;
489
536
}
490
537
491
538
// Now, quotient <= the correctly-rounded result
492
539
// and may need taking NextAfter() up to 3 times (see error estimates above)
493
540
// r = a - b * q
494
- let mut abs_result = if written_exponent > F :: SignedInt :: ZERO {
541
+ let mut abs_result = if res_exponent > 0 {
495
542
let mut ret = quotient & significand_mask;
496
- ret |= written_exponent . unsigned ( ) << significand_bits;
543
+ ret |= F :: Int :: from ( res_exponent as u32 ) << significand_bits;
497
544
residual_lo <<= 1 ;
498
545
ret
499
546
} else {
500
- if ( F :: SignedInt :: cast_from ( significand_bits) + written_exponent ) < F :: SignedInt :: ZERO {
547
+ if ( ( significand_bits as i32 ) + res_exponent ) < 0 {
501
548
return F :: from_repr ( quotient_sign) ;
502
549
}
503
550
504
- let ret = quotient. wrapping_shr ( u32:: cast_from ( written_exponent . wrapping_neg ( ) ) + 1 ) ;
551
+ let ret = quotient. wrapping_shr ( u32:: cast_from ( res_exponent . wrapping_neg ( ) ) + 1 ) ;
505
552
residual_lo = a_significand
506
- . wrapping_shl ( significand_bits. wrapping_add ( CastInto :: < u32 > :: cast ( written_exponent ) ) )
553
+ . wrapping_shl ( significand_bits. wrapping_add ( CastInto :: < u32 > :: cast ( res_exponent ) ) )
507
554
. wrapping_sub ( ret. wrapping_mul ( b_significand) << 1 ) ;
508
555
ret
509
556
} ;
557
+ dbg ! ( res_exponent) ;
510
558
511
559
residual_lo += abs_result & one; // tie to even
512
560
// conditionally turns the below LT comparison into LTE
@@ -539,6 +587,13 @@ intrinsics! {
539
587
div( a, b)
540
588
}
541
589
590
+ #[ avr_skip]
591
+ #[ ppc_alias = __divkf3]
592
+ #[ cfg( not( feature = "no-f16-f128" ) ) ]
593
+ pub extern "C" fn __divtf3( a: f128, b: f128) -> f128 {
594
+ div( a, b)
595
+ }
596
+
542
597
#[ cfg( target_arch = "arm" ) ]
543
598
pub extern "C" fn __divsf3vfp( a: f32 , b: f32 ) -> f32 {
544
599
a / b
0 commit comments