@@ -25,20 +25,8 @@ macro_rules! impl_asymmetric {
25
25
#[ $unsigned_attr]
26
26
) *
27
27
pub fn $unsigned_name( duo: $uD, div: $uD) -> ( $uD, $uD) {
28
- fn carrying_mul( lhs: $uX, rhs: $uX) -> ( $uX, $uX) {
29
- let tmp = ( lhs as $uD) . wrapping_mul( rhs as $uD) ;
30
- ( tmp as $uX, ( tmp >> ( $n_h * 2 ) ) as $uX)
31
- }
32
- fn carrying_mul_add( lhs: $uX, mul: $uX, add: $uX) -> ( $uX, $uX) {
33
- let tmp = ( lhs as $uD) . wrapping_mul( mul as $uD) . wrapping_add( add as $uD) ;
34
- ( tmp as $uX, ( tmp >> ( $n_h * 2 ) ) as $uX)
35
- }
36
-
37
28
let n: u32 = $n_h * 2 ;
38
29
39
- // Many of these subalgorithms are taken from trifecta.rs, see that for better
40
- // documentation.
41
-
42
30
let duo_lo = duo as $uX;
43
31
let duo_hi = ( duo >> n) as $uX;
44
32
let div_lo = div as $uX;
@@ -51,30 +39,6 @@ macro_rules! impl_asymmetric {
51
39
// `$uD` by `$uX` division with a quotient that will fit into a `$uX`
52
40
let ( quo, rem) = unsafe { $asymmetric_division( duo, div_lo) } ;
53
41
return ( quo as $uD, rem as $uD)
54
- } else if ( div_lo >> $n_h) == 0 {
55
- // Short division of $uD by a $uH.
56
-
57
- // Some x86_64 CPUs have bad division implementations that make specializing
58
- // this case faster.
59
- let div_0 = div_lo as $uH as $uX;
60
- let ( quo_hi, rem_3) = $half_division( duo_hi, div_0) ;
61
-
62
- let duo_mid =
63
- ( ( duo >> $n_h) as $uH as $uX)
64
- | ( rem_3 << $n_h) ;
65
- let ( quo_1, rem_2) = $half_division( duo_mid, div_0) ;
66
-
67
- let duo_lo =
68
- ( duo as $uH as $uX)
69
- | ( rem_2 << $n_h) ;
70
- let ( quo_0, rem_1) = $half_division( duo_lo, div_0) ;
71
-
72
- return (
73
- ( quo_0 as $uD)
74
- | ( ( quo_1 as $uD) << $n_h)
75
- | ( ( quo_hi as $uD) << n) ,
76
- rem_1 as $uD
77
- )
78
42
} else {
79
43
// Short division using the $uD by $uX division
80
44
let ( quo_hi, rem_hi) = $half_division( duo_hi, div_lo) ;
@@ -85,59 +49,30 @@ macro_rules! impl_asymmetric {
85
49
}
86
50
}
87
51
88
- let duo_lz = duo_hi. leading_zeros( ) ;
52
+ // This has been adapted from
53
+ // https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn
54
+ // adapted from Hacker's Delight. This is similar to the two possibility algorithm
55
+ // in that it uses only more significant parts of `duo` and `div` to divide a large
56
+ // integer with a smaller division instruction.
89
57
let div_lz = div_hi. leading_zeros( ) ;
90
- let rel_leading_sb = div_lz. wrapping_sub( duo_lz) ;
91
- if rel_leading_sb < $n_h {
92
- // Some x86_64 CPUs have bad hardware division implementations that make putting
93
- // a two possibility algorithm here beneficial. We also avoid a full `$uD`
94
- // multiplication.
95
- let shift = n - duo_lz;
96
- let duo_sig_n = ( duo >> shift) as $uX;
97
- let div_sig_n = ( div >> shift) as $uX;
98
- let quo = $half_division( duo_sig_n, div_sig_n) . 0 ;
99
- let div_lo = div as $uX;
100
- let div_hi = ( div >> n) as $uX;
101
- let ( tmp_lo, carry) = carrying_mul( quo, div_lo) ;
102
- let ( tmp_hi, overflow) = carrying_mul_add( quo, div_hi, carry) ;
103
- let tmp = ( tmp_lo as $uD) | ( ( tmp_hi as $uD) << n) ;
104
- if ( overflow != 0 ) || ( duo < tmp) {
105
- return (
106
- ( quo - 1 ) as $uD,
107
- duo. wrapping_add( div) . wrapping_sub( tmp)
108
- )
109
- } else {
110
- return (
111
- quo as $uD,
112
- duo - tmp
113
- )
114
- }
115
- } else {
116
- // This has been adapted from
117
- // https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn
118
- // adapted from Hacker's Delight. This is similar to the two possibility algorithm
119
- // in that it uses only more significant parts of `duo` and `div` to divide a large
120
- // integer with a smaller division instruction.
121
-
122
- let div_extra = n - div_lz;
123
- let div_sig_n = ( div >> div_extra) as $uX;
124
- let tmp = unsafe {
125
- $asymmetric_division( duo >> 1 , div_sig_n)
126
- } ;
58
+ let div_extra = n - div_lz;
59
+ let div_sig_n = ( div >> div_extra) as $uX;
60
+ let tmp = unsafe {
61
+ $asymmetric_division( duo >> 1 , div_sig_n)
62
+ } ;
127
63
128
- let mut quo = tmp. 0 >> ( ( n - 1 ) - div_lz) ;
129
- if quo != 0 {
130
- quo -= 1 ;
131
- }
64
+ let mut quo = tmp. 0 >> ( ( n - 1 ) - div_lz) ;
65
+ if quo != 0 {
66
+ quo -= 1 ;
67
+ }
132
68
133
- // Note that this is a full `$uD` multiplication being used here
134
- let mut rem = duo - ( quo as $uD) . wrapping_mul( div) ;
135
- if div <= rem {
136
- quo += 1 ;
137
- rem -= div;
138
- }
139
- return ( quo as $uD, rem)
69
+ // Note that this is a full `$uD` multiplication being used here
70
+ let mut rem = duo - ( quo as $uD) . wrapping_mul( div) ;
71
+ if div <= rem {
72
+ quo += 1 ;
73
+ rem -= div;
140
74
}
75
+ return ( quo as $uD, rem)
141
76
}
142
77
143
78
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a
0 commit comments