Skip to content

Commit 4cfe24a

Browse files
committed
Optimize integer pow by removing exit branch
The branch at the end of the `pow` implementations is redundant with multiplication code already present in the loop. By rotating the exit check, this branch can be largely removed, improving code size and instruction cache coherence.
1 parent a18fbd0 commit 4cfe24a

File tree

2 files changed

+52
-73
lines changed

2 files changed

+52
-73
lines changed

Diff for: core/src/num/int_macros.rs

+26-35
Original file line numberDiff line numberDiff line change
@@ -1495,18 +1495,17 @@ macro_rules! int_impl {
14951495
let mut base = self;
14961496
let mut acc: Self = 1;
14971497

1498-
while exp > 1 {
1498+
loop {
14991499
if (exp & 1) == 1 {
15001500
acc = try_opt!(acc.checked_mul(base));
1501+
// since exp!=0, finally the exp must be 1.
1502+
if exp == 1 {
1503+
return Some(acc);
1504+
}
15011505
}
15021506
exp /= 2;
15031507
base = try_opt!(base.checked_mul(base));
15041508
}
1505-
// since exp!=0, finally the exp must be 1.
1506-
// Deal with the final bit of the exponent separately, since
1507-
// squaring the base afterwards is not necessary and may cause a
1508-
// needless overflow.
1509-
acc.checked_mul(base)
15101509
}
15111510

15121511
/// Strict exponentiation. Computes `self.pow(exp)`, panicking if
@@ -1546,18 +1545,17 @@ macro_rules! int_impl {
15461545
let mut base = self;
15471546
let mut acc: Self = 1;
15481547

1549-
while exp > 1 {
1548+
loop {
15501549
if (exp & 1) == 1 {
15511550
acc = acc.strict_mul(base);
1551+
// since exp!=0, finally the exp must be 1.
1552+
if exp == 1 {
1553+
return acc;
1554+
}
15521555
}
15531556
exp /= 2;
15541557
base = base.strict_mul(base);
15551558
}
1556-
// since exp!=0, finally the exp must be 1.
1557-
// Deal with the final bit of the exponent separately, since
1558-
// squaring the base afterwards is not necessary and may cause a
1559-
// needless overflow.
1560-
acc.strict_mul(base)
15611559
}
15621560

15631561
/// Returns the square root of the number, rounded down.
@@ -2181,19 +2179,17 @@ macro_rules! int_impl {
21812179
let mut base = self;
21822180
let mut acc: Self = 1;
21832181

2184-
while exp > 1 {
2182+
loop {
21852183
if (exp & 1) == 1 {
21862184
acc = acc.wrapping_mul(base);
2185+
// since exp!=0, finally the exp must be 1.
2186+
if exp == 1 {
2187+
return acc;
2188+
}
21872189
}
21882190
exp /= 2;
21892191
base = base.wrapping_mul(base);
21902192
}
2191-
2192-
// since exp!=0, finally the exp must be 1.
2193-
// Deal with the final bit of the exponent separately, since
2194-
// squaring the base afterwards is not necessary and may cause a
2195-
// needless overflow.
2196-
acc.wrapping_mul(base)
21972193
}
21982194

21992195
/// Calculates `self` + `rhs`
@@ -2687,9 +2683,14 @@ macro_rules! int_impl {
26872683
// Scratch space for storing results of overflowing_mul.
26882684
let mut r;
26892685

2690-
while exp > 1 {
2686+
loop {
26912687
if (exp & 1) == 1 {
26922688
r = acc.overflowing_mul(base);
2689+
// since exp!=0, finally the exp must be 1.
2690+
if exp == 1 {
2691+
r.1 |= overflown;
2692+
return r;
2693+
}
26932694
acc = r.0;
26942695
overflown |= r.1;
26952696
}
@@ -2698,14 +2699,6 @@ macro_rules! int_impl {
26982699
base = r.0;
26992700
overflown |= r.1;
27002701
}
2701-
2702-
// since exp!=0, finally the exp must be 1.
2703-
// Deal with the final bit of the exponent separately, since
2704-
// squaring the base afterwards is not necessary and may cause a
2705-
// needless overflow.
2706-
r = acc.overflowing_mul(base);
2707-
r.1 |= overflown;
2708-
r
27092702
}
27102703

27112704
/// Raises self to the power of `exp`, using exponentiation by squaring.
@@ -2732,19 +2725,17 @@ macro_rules! int_impl {
27322725
let mut base = self;
27332726
let mut acc = 1;
27342727

2735-
while exp > 1 {
2728+
loop {
27362729
if (exp & 1) == 1 {
27372730
acc = acc * base;
2731+
// since exp!=0, finally the exp must be 1.
2732+
if exp == 1 {
2733+
return acc;
2734+
}
27382735
}
27392736
exp /= 2;
27402737
base = base * base;
27412738
}
2742-
2743-
// since exp!=0, finally the exp must be 1.
2744-
// Deal with the final bit of the exponent separately, since
2745-
// squaring the base afterwards is not necessary and may cause a
2746-
// needless overflow.
2747-
acc * base
27482739
}
27492740

27502741
/// Returns the square root of the number, rounded down.

Diff for: core/src/num/uint_macros.rs

+26-38
Original file line numberDiff line numberDiff line change
@@ -1534,20 +1534,17 @@ macro_rules! uint_impl {
15341534
let mut base = self;
15351535
let mut acc: Self = 1;
15361536

1537-
while exp > 1 {
1537+
loop {
15381538
if (exp & 1) == 1 {
15391539
acc = try_opt!(acc.checked_mul(base));
1540+
// since exp!=0, finally the exp must be 1.
1541+
if exp == 1 {
1542+
return Some(acc);
1543+
}
15401544
}
15411545
exp /= 2;
15421546
base = try_opt!(base.checked_mul(base));
15431547
}
1544-
1545-
// since exp!=0, finally the exp must be 1.
1546-
// Deal with the final bit of the exponent separately, since
1547-
// squaring the base afterwards is not necessary and may cause a
1548-
// needless overflow.
1549-
1550-
acc.checked_mul(base)
15511548
}
15521549

15531550
/// Strict exponentiation. Computes `self.pow(exp)`, panicking if
@@ -1587,18 +1584,17 @@ macro_rules! uint_impl {
15871584
let mut base = self;
15881585
let mut acc: Self = 1;
15891586

1590-
while exp > 1 {
1587+
loop {
15911588
if (exp & 1) == 1 {
15921589
acc = acc.strict_mul(base);
1590+
// since exp!=0, finally the exp must be 1.
1591+
if exp == 1 {
1592+
return acc;
1593+
}
15931594
}
15941595
exp /= 2;
15951596
base = base.strict_mul(base);
15961597
}
1597-
// since exp!=0, finally the exp must be 1.
1598-
// Deal with the final bit of the exponent separately, since
1599-
// squaring the base afterwards is not necessary and may cause a
1600-
// needless overflow.
1601-
acc.strict_mul(base)
16021598
}
16031599

16041600
/// Saturating integer addition. Computes `self + rhs`, saturating at
@@ -2059,19 +2055,17 @@ macro_rules! uint_impl {
20592055
let mut base = self;
20602056
let mut acc: Self = 1;
20612057

2062-
while exp > 1 {
2058+
loop {
20632059
if (exp & 1) == 1 {
20642060
acc = acc.wrapping_mul(base);
2061+
// since exp!=0, finally the exp must be 1.
2062+
if exp == 1 {
2063+
return acc;
2064+
}
20652065
}
20662066
exp /= 2;
20672067
base = base.wrapping_mul(base);
20682068
}
2069-
2070-
// since exp!=0, finally the exp must be 1.
2071-
// Deal with the final bit of the exponent separately, since
2072-
// squaring the base afterwards is not necessary and may cause a
2073-
// needless overflow.
2074-
acc.wrapping_mul(base)
20752069
}
20762070

20772071
/// Calculates `self` + `rhs`
@@ -2516,9 +2510,14 @@ macro_rules! uint_impl {
25162510
// Scratch space for storing results of overflowing_mul.
25172511
let mut r;
25182512

2519-
while exp > 1 {
2513+
loop {
25202514
if (exp & 1) == 1 {
25212515
r = acc.overflowing_mul(base);
2516+
// since exp!=0, finally the exp must be 1.
2517+
if exp == 1 {
2518+
r.1 |= overflown;
2519+
return r;
2520+
}
25222521
acc = r.0;
25232522
overflown |= r.1;
25242523
}
@@ -2527,15 +2526,6 @@ macro_rules! uint_impl {
25272526
base = r.0;
25282527
overflown |= r.1;
25292528
}
2530-
2531-
// since exp!=0, finally the exp must be 1.
2532-
// Deal with the final bit of the exponent separately, since
2533-
// squaring the base afterwards is not necessary and may cause a
2534-
// needless overflow.
2535-
r = acc.overflowing_mul(base);
2536-
r.1 |= overflown;
2537-
2538-
r
25392529
}
25402530

25412531
/// Raises self to the power of `exp`, using exponentiation by squaring.
@@ -2560,19 +2550,17 @@ macro_rules! uint_impl {
25602550
let mut base = self;
25612551
let mut acc = 1;
25622552

2563-
while exp > 1 {
2553+
loop {
25642554
if (exp & 1) == 1 {
25652555
acc = acc * base;
2556+
// since exp!=0, finally the exp must be 1.
2557+
if exp == 1 {
2558+
return acc;
2559+
}
25662560
}
25672561
exp /= 2;
25682562
base = base * base;
25692563
}
2570-
2571-
// since exp!=0, finally the exp must be 1.
2572-
// Deal with the final bit of the exponent separately, since
2573-
// squaring the base afterwards is not necessary and may cause a
2574-
// needless overflow.
2575-
acc * base
25762564
}
25772565

25782566
/// Returns the square root of the number, rounded down.

0 commit comments

Comments
 (0)