@@ -200,6 +200,7 @@ impl FloatDivision for f128 {
200
200
}
201
201
202
202
extern crate std;
203
+ use core:: ops;
203
204
#[ allow( unused) ]
204
205
use std:: { dbg, fmt, println} ;
205
206
@@ -217,6 +218,7 @@ where
217
218
F :: Int : CastInto < u64 > ,
218
219
F :: Int : CastInto < i64 > ,
219
220
F :: Int : HInt + DInt ,
221
+ <F :: Int as HInt >:: D : ops:: Shr < u32 , Output = <F :: Int as HInt >:: D > ,
220
222
F :: SignedInt : CastFrom < u32 > ,
221
223
F :: SignedInt : CastInto < u32 > ,
222
224
F :: SignedInt : CastFrom < F :: Int > ,
@@ -237,6 +239,8 @@ where
237
239
{
238
240
let one = F :: Int :: ONE ;
239
241
let zero = F :: Int :: ZERO ;
242
+ let one_hw = HalfRep :: < F > :: ONE ;
243
+ let zero_hw = HalfRep :: < F > :: ZERO ;
240
244
let hw = F :: BITS / 2 ;
241
245
let lo_mask = F :: Int :: MAX >> hw;
242
246
@@ -399,9 +403,7 @@ where
399
403
let c_hw = F :: C_HW ;
400
404
401
405
// Check that the top bit is set, i.e. value is within `[1, 2)`.
402
- debug_assert ! (
403
- b_uq1_hw & HalfRep :: <F >:: ONE << ( HalfRep :: <F >:: BITS - 1 ) > HalfRep :: <F >:: ZERO
404
- ) ;
406
+ debug_assert ! ( b_uq1_hw & one_hw << ( HalfRep :: <F >:: BITS - 1 ) > zero_hw) ;
405
407
406
408
// b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
407
409
// so x0 fits to UQ0.HW without wrapping.
@@ -437,8 +439,7 @@ where
437
439
// no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
438
440
// expected to be strictly positive because b_UQ1_hw has its highest bit set
439
441
// and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
440
- let corr_uq1_hw: HalfRep < F > =
441
- HalfRep :: < F > :: ZERO . wrapping_sub ( x_uq0_hw. widen_mul ( b_uq1_hw) . hi ( ) ) ;
442
+ let corr_uq1_hw: HalfRep < F > = zero_hw. wrapping_sub ( x_uq0_hw. widen_mul ( b_uq1_hw) . hi ( ) ) ;
442
443
443
444
// Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
444
445
// obtaining an UQ1.(HW-1) number and proving its highest bit could be
@@ -483,7 +484,7 @@ where
483
484
// be not below that value (see g(x) above), so it is safe to decrement just
484
485
// once after the final iteration. On the other hand, an effective value of
485
486
// divisor changes after this point (from b_hw to b), so adjust here.
486
- x_uq0_hw = x_uq0_hw. wrapping_sub ( HalfRep :: < F > :: ONE ) ;
487
+ x_uq0_hw = x_uq0_hw. wrapping_sub ( one_hw ) ;
487
488
488
489
// Error estimations for full-precision iterations are calculated just
489
490
// as above, but with U := 2^-W and taking extra decrementing into account.
@@ -548,13 +549,8 @@ where
548
549
assert ! ( F :: BITS == 32 , "native full iterations only supports f32" ) ;
549
550
550
551
for _ in 0 ..full_iterations {
551
- let corr_uq1: u32 = 0 . wrapping_sub (
552
- ( ( CastInto :: < u32 > :: cast ( x_uq0) as u64 ) * ( CastInto :: < u32 > :: cast ( b_uq1) as u64 ) )
553
- >> F :: BITS ,
554
- ) as u32 ;
555
- x_uq0 = ( ( ( ( CastInto :: < u32 > :: cast ( x_uq0) as u64 ) * ( corr_uq1 as u64 ) ) >> ( F :: BITS - 1 ) )
556
- as u32 )
557
- . cast ( ) ;
552
+ let corr_uq1: F :: Int = zero. wrapping_sub ( x_uq0. widen_mul ( b_uq1) . hi ( ) ) ;
553
+ x_uq0 = ( x_uq0. widen_mul ( corr_uq1) >> ( F :: BITS - 1 ) ) . lo ( ) ;
558
554
}
559
555
}
560
556
0 commit comments