Skip to content

Commit e9688c6

Browse files
committed
Remove unneeded code from asymmetric.rs
Rebenchmarking this showed that perf changed for the worse only on really low end CPUs
1 parent eff506c commit e9688c6

File tree

1 file changed

+20
-85
lines changed

1 file changed

+20
-85
lines changed

src/int/specialized_div_rem/asymmetric.rs

Lines changed: 20 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,8 @@ macro_rules! impl_asymmetric {
2525
#[$unsigned_attr]
2626
)*
2727
pub fn $unsigned_name(duo: $uD, div: $uD) -> ($uD,$uD) {
28-
fn carrying_mul(lhs: $uX, rhs: $uX) -> ($uX, $uX) {
29-
let tmp = (lhs as $uD).wrapping_mul(rhs as $uD);
30-
(tmp as $uX, (tmp >> ($n_h * 2)) as $uX)
31-
}
32-
fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) {
33-
let tmp = (lhs as $uD).wrapping_mul(mul as $uD).wrapping_add(add as $uD);
34-
(tmp as $uX, (tmp >> ($n_h * 2)) as $uX)
35-
}
36-
3728
let n: u32 = $n_h * 2;
3829

39-
// Many of these subalgorithms are taken from trifecta.rs, see that for better
40-
// documentation.
41-
4230
let duo_lo = duo as $uX;
4331
let duo_hi = (duo >> n) as $uX;
4432
let div_lo = div as $uX;
@@ -51,30 +39,6 @@ macro_rules! impl_asymmetric {
5139
// `$uD` by `$uX` division with a quotient that will fit into a `$uX`
5240
let (quo, rem) = unsafe { $asymmetric_division(duo, div_lo) };
5341
return (quo as $uD, rem as $uD)
54-
} else if (div_lo >> $n_h) == 0 {
55-
// Short division of $uD by a $uH.
56-
57-
// Some x86_64 CPUs have bad division implementations that make specializing
58-
// this case faster.
59-
let div_0 = div_lo as $uH as $uX;
60-
let (quo_hi, rem_3) = $half_division(duo_hi, div_0);
61-
62-
let duo_mid =
63-
((duo >> $n_h) as $uH as $uX)
64-
| (rem_3 << $n_h);
65-
let (quo_1, rem_2) = $half_division(duo_mid, div_0);
66-
67-
let duo_lo =
68-
(duo as $uH as $uX)
69-
| (rem_2 << $n_h);
70-
let (quo_0, rem_1) = $half_division(duo_lo, div_0);
71-
72-
return (
73-
(quo_0 as $uD)
74-
| ((quo_1 as $uD) << $n_h)
75-
| ((quo_hi as $uD) << n),
76-
rem_1 as $uD
77-
)
7842
} else {
7943
// Short division using the $uD by $uX division
8044
let (quo_hi, rem_hi) = $half_division(duo_hi, div_lo);
@@ -85,59 +49,30 @@ macro_rules! impl_asymmetric {
8549
}
8650
}
8751

88-
let duo_lz = duo_hi.leading_zeros();
52+
// This has been adapted from
53+
// https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn
54+
// adapted from Hacker's Delight. This is similar to the two possibility algorithm
55+
// in that it uses only more significant parts of `duo` and `div` to divide a large
56+
// integer with a smaller division instruction.
8957
let div_lz = div_hi.leading_zeros();
90-
let rel_leading_sb = div_lz.wrapping_sub(duo_lz);
91-
if rel_leading_sb < $n_h {
92-
// Some x86_64 CPUs have bad hardware division implementations that make putting
93-
// a two possibility algorithm here beneficial. We also avoid a full `$uD`
94-
// multiplication.
95-
let shift = n - duo_lz;
96-
let duo_sig_n = (duo >> shift) as $uX;
97-
let div_sig_n = (div >> shift) as $uX;
98-
let quo = $half_division(duo_sig_n, div_sig_n).0;
99-
let div_lo = div as $uX;
100-
let div_hi = (div >> n) as $uX;
101-
let (tmp_lo, carry) = carrying_mul(quo, div_lo);
102-
let (tmp_hi, overflow) = carrying_mul_add(quo, div_hi, carry);
103-
let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n);
104-
if (overflow != 0) || (duo < tmp) {
105-
return (
106-
(quo - 1) as $uD,
107-
duo.wrapping_add(div).wrapping_sub(tmp)
108-
)
109-
} else {
110-
return (
111-
quo as $uD,
112-
duo - tmp
113-
)
114-
}
115-
} else {
116-
// This has been adapted from
117-
// https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn
118-
// adapted from Hacker's Delight. This is similar to the two possibility algorithm
119-
// in that it uses only more significant parts of `duo` and `div` to divide a large
120-
// integer with a smaller division instruction.
121-
122-
let div_extra = n - div_lz;
123-
let div_sig_n = (div >> div_extra) as $uX;
124-
let tmp = unsafe {
125-
$asymmetric_division(duo >> 1, div_sig_n)
126-
};
58+
let div_extra = n - div_lz;
59+
let div_sig_n = (div >> div_extra) as $uX;
60+
let tmp = unsafe {
61+
$asymmetric_division(duo >> 1, div_sig_n)
62+
};
12763

128-
let mut quo = tmp.0 >> ((n - 1) - div_lz);
129-
if quo != 0 {
130-
quo -= 1;
131-
}
64+
let mut quo = tmp.0 >> ((n - 1) - div_lz);
65+
if quo != 0 {
66+
quo -= 1;
67+
}
13268

133-
// Note that this is a full `$uD` multiplication being used here
134-
let mut rem = duo - (quo as $uD).wrapping_mul(div);
135-
if div <= rem {
136-
quo += 1;
137-
rem -= div;
138-
}
139-
return (quo as $uD, rem)
69+
// Note that this is a full `$uD` multiplication being used here
70+
let mut rem = duo - (quo as $uD).wrapping_mul(div);
71+
if div <= rem {
72+
quo += 1;
73+
rem -= div;
14074
}
75+
return (quo as $uD, rem)
14176
}
14277

14378
/// Computes the quotient and remainder of `duo` divided by `div` and returns them as a

0 commit comments

Comments
 (0)