Explicitly unroll integer pow for small exponents

mzabaluev · mzabaluev · commit 05ee32298cbc · 2024-07-12T00:54:26.000+03:00
The newly optimized loop has introduced a regression in the case
when pow is called with a small constant exponent. LLVM is no longer
able to unroll the loop and the generated code is larger and slower
than what's expected in tests.

Match and handle small exponent values separately by branching out
to an explicit multiplication sequence for that exponent.
Powers larger than 6 need more than three multiplications, so these
cases are less likely to benefit from this optimization, also such
constant exponents are less likely to be used in practice.
For uses with a non-constant exponent, this might also provide
a performance benefit if the exponent is small and does not vary
between successive calls, so the same match arm tends to be taken as
a predicted branch.
diff --git a/core/src/num/int_macros.rs b/core/src/num/int_macros.rs
@@ -2173,10 +2173,35 @@ macro_rules! int_impl {
                       without modifying the original"]
         #[inline]
         pub const fn wrapping_pow(self, mut exp: u32) -> Self {
-            if exp == 0 {
-                return 1;
-            }
             let mut base = self;
+
+            // Unroll multiplications for small exponent values.
+            // This gives the optimizer a way to efficiently inline call sites
+            // for the most common use cases with constant exponents.
+            // Currently, LLVM is unable to unroll the loop below.
+            match exp {
+                0 => return 1,
+                1 => return base,
+                2 => return base.wrapping_mul(base),
+                3 => {
+                    let squared = base.wrapping_mul(base);
+                    return squared.wrapping_mul(base);
+                }
+                4 => {
+                    let squared = base.wrapping_mul(base);
+                    return squared.wrapping_mul(squared);
+                }
+                5 => {
+                    let squared = base.wrapping_mul(base);
+                    return squared.wrapping_mul(squared).wrapping_mul(base);
+                }
+                6 => {
+                    let cubed = base.wrapping_mul(base).wrapping_mul(base);
+                    return cubed.wrapping_mul(cubed);
+                }
+                _ => {}
+            }
+
             let mut acc: Self = 1;
 
             loop {
@@ -2719,10 +2744,35 @@ macro_rules! int_impl {
         #[inline]
         #[rustc_inherit_overflow_checks]
         pub const fn pow(self, mut exp: u32) -> Self {
-            if exp == 0 {
-                return 1;
-            }
             let mut base = self;
+
+            // Unroll multiplications for small exponent values.
+            // This gives the optimizer a way to efficiently inline call sites
+            // for the most common use cases with constant exponents.
+            // Currently, LLVM is unable to unroll the loop below.
+            match exp {
+                0 => return 1,
+                1 => return base,
+                2 => return base * base,
+                3 => {
+                    let squared = base * base;
+                    return squared * base;
+                }
+                4 => {
+                    let squared = base * base;
+                    return squared * squared;
+                }
+                5 => {
+                    let squared = base * base;
+                    return squared * squared * base;
+                }
+                6 => {
+                    let cubed = base * base * base;
+                    return cubed * cubed;
+                }
+                _ => {}
+            }
+
             let mut acc = 1;
 
             loop {
diff --git a/core/src/num/uint_macros.rs b/core/src/num/uint_macros.rs
@@ -2049,10 +2049,35 @@ macro_rules! uint_impl {
                       without modifying the original"]
         #[inline]
         pub const fn wrapping_pow(self, mut exp: u32) -> Self {
-            if exp == 0 {
-                return 1;
-            }
             let mut base = self;
+
+            // Unroll multiplications for small exponent values.
+            // This gives the optimizer a way to efficiently inline call sites
+            // for the most common use cases with constant exponents.
+            // Currently, LLVM is unable to unroll the loop below.
+            match exp {
+                0 => return 1,
+                1 => return base,
+                2 => return base.wrapping_mul(base),
+                3 => {
+                    let squared = base.wrapping_mul(base);
+                    return squared.wrapping_mul(base);
+                }
+                4 => {
+                    let squared = base.wrapping_mul(base);
+                    return squared.wrapping_mul(squared);
+                }
+                5 => {
+                    let squared = base.wrapping_mul(base);
+                    return squared.wrapping_mul(squared).wrapping_mul(base);
+                }
+                6 => {
+                    let cubed = base.wrapping_mul(base).wrapping_mul(base);
+                    return cubed.wrapping_mul(cubed);
+                }
+                _ => {}
+            }
+
             let mut acc: Self = 1;
 
             loop {
@@ -2544,10 +2569,35 @@ macro_rules! uint_impl {
         #[inline]
         #[rustc_inherit_overflow_checks]
         pub const fn pow(self, mut exp: u32) -> Self {
-            if exp == 0 {
-                return 1;
-            }
             let mut base = self;
+
+            // Unroll multiplications for small exponent values.
+            // This gives the optimizer a way to efficiently inline call sites
+            // for the most common use cases with constant exponents.
+            // Currently, LLVM is unable to unroll the loop below.
+            match exp {
+                0 => return 1,
+                1 => return base,
+                2 => return base * base,
+                3 => {
+                    let squared = base * base;
+                    return squared * base;
+                }
+                4 => {
+                    let squared = base * base;
+                    return squared * squared;
+                }
+                5 => {
+                    let squared = base * base;
+                    return squared * squared * base;
+                }
+                6 => {
+                    let cubed = base * base * base;
+                    return cubed * cubed;
+                }
+                _ => {}
+            }
+
             let mut acc = 1;
 
             loop {