Skip to content

Commit 05ee322

Browse files
committed
Explicitly unroll integer pow for small exponents
The newly optimized loop has introduced a regression in the case when pow is called with a small constant exponent. LLVM is no longer able to unroll the loop and the generated code is larger and slower than what's expected in tests. Match and handle small exponent values separately by branching out to an explicit multiplication sequence for that exponent. Powers larger than 6 need more than three multiplications, so these cases are less likely to benefit from this optimization, also such constant exponents are less likely to be used in practice. For uses with a non-constant exponent, this might also provide a performance benefit if the exponent is small and does not vary between successive calls, so the same match arm tends to be taken as a predicted branch.
1 parent 4cfe24a commit 05ee322

File tree

2 files changed

+112
-12
lines changed

2 files changed

+112
-12
lines changed

Diff for: core/src/num/int_macros.rs

+56-6
Original file line numberDiff line numberDiff line change
@@ -2173,10 +2173,35 @@ macro_rules! int_impl {
21732173
without modifying the original"]
21742174
#[inline]
21752175
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
2176-
if exp == 0 {
2177-
return 1;
2178-
}
21792176
let mut base = self;
2177+
2178+
// Unroll multiplications for small exponent values.
2179+
// This gives the optimizer a way to efficiently inline call sites
2180+
// for the most common use cases with constant exponents.
2181+
// Currently, LLVM is unable to unroll the loop below.
2182+
match exp {
2183+
0 => return 1,
2184+
1 => return base,
2185+
2 => return base.wrapping_mul(base),
2186+
3 => {
2187+
let squared = base.wrapping_mul(base);
2188+
return squared.wrapping_mul(base);
2189+
}
2190+
4 => {
2191+
let squared = base.wrapping_mul(base);
2192+
return squared.wrapping_mul(squared);
2193+
}
2194+
5 => {
2195+
let squared = base.wrapping_mul(base);
2196+
return squared.wrapping_mul(squared).wrapping_mul(base);
2197+
}
2198+
6 => {
2199+
let cubed = base.wrapping_mul(base).wrapping_mul(base);
2200+
return cubed.wrapping_mul(cubed);
2201+
}
2202+
_ => {}
2203+
}
2204+
21802205
let mut acc: Self = 1;
21812206

21822207
loop {
@@ -2719,10 +2744,35 @@ macro_rules! int_impl {
27192744
#[inline]
27202745
#[rustc_inherit_overflow_checks]
27212746
pub const fn pow(self, mut exp: u32) -> Self {
2722-
if exp == 0 {
2723-
return 1;
2724-
}
27252747
let mut base = self;
2748+
2749+
// Unroll multiplications for small exponent values.
2750+
// This gives the optimizer a way to efficiently inline call sites
2751+
// for the most common use cases with constant exponents.
2752+
// Currently, LLVM is unable to unroll the loop below.
2753+
match exp {
2754+
0 => return 1,
2755+
1 => return base,
2756+
2 => return base * base,
2757+
3 => {
2758+
let squared = base * base;
2759+
return squared * base;
2760+
}
2761+
4 => {
2762+
let squared = base * base;
2763+
return squared * squared;
2764+
}
2765+
5 => {
2766+
let squared = base * base;
2767+
return squared * squared * base;
2768+
}
2769+
6 => {
2770+
let cubed = base * base * base;
2771+
return cubed * cubed;
2772+
}
2773+
_ => {}
2774+
}
2775+
27262776
let mut acc = 1;
27272777

27282778
loop {

Diff for: core/src/num/uint_macros.rs

+56-6
Original file line numberDiff line numberDiff line change
@@ -2049,10 +2049,35 @@ macro_rules! uint_impl {
20492049
without modifying the original"]
20502050
#[inline]
20512051
pub const fn wrapping_pow(self, mut exp: u32) -> Self {
2052-
if exp == 0 {
2053-
return 1;
2054-
}
20552052
let mut base = self;
2053+
2054+
// Unroll multiplications for small exponent values.
2055+
// This gives the optimizer a way to efficiently inline call sites
2056+
// for the most common use cases with constant exponents.
2057+
// Currently, LLVM is unable to unroll the loop below.
2058+
match exp {
2059+
0 => return 1,
2060+
1 => return base,
2061+
2 => return base.wrapping_mul(base),
2062+
3 => {
2063+
let squared = base.wrapping_mul(base);
2064+
return squared.wrapping_mul(base);
2065+
}
2066+
4 => {
2067+
let squared = base.wrapping_mul(base);
2068+
return squared.wrapping_mul(squared);
2069+
}
2070+
5 => {
2071+
let squared = base.wrapping_mul(base);
2072+
return squared.wrapping_mul(squared).wrapping_mul(base);
2073+
}
2074+
6 => {
2075+
let cubed = base.wrapping_mul(base).wrapping_mul(base);
2076+
return cubed.wrapping_mul(cubed);
2077+
}
2078+
_ => {}
2079+
}
2080+
20562081
let mut acc: Self = 1;
20572082

20582083
loop {
@@ -2544,10 +2569,35 @@ macro_rules! uint_impl {
25442569
#[inline]
25452570
#[rustc_inherit_overflow_checks]
25462571
pub const fn pow(self, mut exp: u32) -> Self {
2547-
if exp == 0 {
2548-
return 1;
2549-
}
25502572
let mut base = self;
2573+
2574+
// Unroll multiplications for small exponent values.
2575+
// This gives the optimizer a way to efficiently inline call sites
2576+
// for the most common use cases with constant exponents.
2577+
// Currently, LLVM is unable to unroll the loop below.
2578+
match exp {
2579+
0 => return 1,
2580+
1 => return base,
2581+
2 => return base * base,
2582+
3 => {
2583+
let squared = base * base;
2584+
return squared * base;
2585+
}
2586+
4 => {
2587+
let squared = base * base;
2588+
return squared * squared;
2589+
}
2590+
5 => {
2591+
let squared = base * base;
2592+
return squared * squared * base;
2593+
}
2594+
6 => {
2595+
let cubed = base * base * base;
2596+
return cubed * cubed;
2597+
}
2598+
_ => {}
2599+
}
2600+
25512601
let mut acc = 1;
25522602

25532603
loop {

0 commit comments

Comments
 (0)