@@ -88,47 +88,11 @@ trait FloatDivision: Float
88
88
where
89
89
Self :: Int : DInt ,
90
90
{
91
- // /// Iterations that are done at half of the float's width, done for optimization.
92
- // const HALF_ITERATIONS: usize;
93
-
94
- // /// Iterations that are done at the full float's width. Must be at least one.
95
- // const FULL_ITERATIONS: usize = 1;
96
-
97
- // const USE_NATIVE_FULL_ITERATIONS: bool = size_of::<Self>() < size_of::<*const ()>();
98
-
99
91
/// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
100
92
/// with W0 being either 16 or 32 and W0 <= HW.
101
93
/// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
102
94
/// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
103
95
const C_HW : HalfRep < Self > ;
104
-
105
- // const RECIPROCAL_PRECISION: u16 = {
106
- // // Do some related configuration validation
107
- // if !Self::USE_NATIVE_FULL_ITERATIONS {
108
- // if Self::FULL_ITERATIONS != 1 {
109
- // panic!("Only a single emulated full iteration is supported");
110
- // }
111
- // if !(Self::HALF_ITERATIONS > 0) {
112
- // panic!("Invalid number of half iterations");
113
- // }
114
- // }
115
-
116
- // if Self::FULL_ITERATIONS < 1 {
117
- // panic!("Must have at least one full iteration");
118
- // }
119
-
120
- // if Self::BITS == 32 && Self::HALF_ITERATIONS == 2 && Self::FULL_ITERATIONS == 1 {
121
- // 74u16
122
- // } else if Self::BITS == 32 && Self::HALF_ITERATIONS == 0 && Self::FULL_ITERATIONS == 3 {
123
- // 10
124
- // } else if Self::BITS == 64 && Self::HALF_ITERATIONS == 3 && Self::FULL_ITERATIONS == 1 {
125
- // 220
126
- // } else if Self::BITS == 128 && Self::HALF_ITERATIONS == 4 && Self::FULL_ITERATIONS == 1 {
127
- // 13922
128
- // } else {
129
- // panic!("Invalid number of iterations")
130
- // }
131
- // };
132
96
}
133
97
134
98
/// Calculate the number of iterations required to get needed precision of a float type.
@@ -144,8 +108,9 @@ const fn calc_iterations<F: Float>() -> (usize, usize) {
144
108
// Precision doubles with each iteration
145
109
let total_iterations = F :: BITS . ilog2 ( ) as usize - 2 ;
146
110
147
- if size_of :: < F > ( ) < size_of :: < * const ( ) > ( ) {
148
- // No need to use half iterations if math at the half
111
+ // If widening multiply will be efficient (uses word-sized integers), there is no reason
112
+ // to use half-sized iterations.
113
+ if 2 * size_of :: < F > ( ) <= size_of :: < * const ( ) > ( ) {
149
114
( 0 , total_iterations)
150
115
} else {
151
116
( total_iterations - 1 , 1 )
@@ -201,9 +166,6 @@ const fn reciprocal_precision<F: Float>() -> u16 {
201
166
}
202
167
203
168
impl FloatDivision for f32 {
204
- // const HALF_ITERATIONS: usize = 0;
205
- // const FULL_ITERATIONS: usize = 3;
206
-
207
169
/// Use 16-bit initial estimation in case we are using half-width iterations
208
170
/// for float32 division. This is expected to be useful for some 16-bit
209
171
/// targets. Not used by default as it requires performing more work during
@@ -573,15 +535,9 @@ where
573
535
x_uq0
574
536
} ;
575
537
576
- if full_iterations > 1 {
577
- // Need to use concrete types since `F::Int::D` might not support math. So, restrict to
578
- // one type.
579
- // assert!(F::BITS == 32, "native full iterations only supports f32");
580
-
581
- for _ in 0 ..full_iterations {
582
- let corr_uq1: F :: Int = zero. wrapping_sub ( x_uq0. widen_mul ( b_uq1) . hi ( ) ) ;
583
- x_uq0 = ( x_uq0. widen_mul ( corr_uq1) >> ( F :: BITS - 1 ) ) . lo ( ) ;
584
- }
538
+ for _ in 0 ..full_iterations {
539
+ let corr_uq1: F :: Int = zero. wrapping_sub ( x_uq0. widen_mul ( b_uq1) . hi ( ) ) ;
540
+ x_uq0 = ( x_uq0. widen_mul ( corr_uq1) >> ( F :: BITS - 1 ) ) . lo ( ) ;
585
541
}
586
542
587
543
// Finally, account for possible overflow, as explained above.
0 commit comments