Auto merge of #106343 - the8472:slice-iter-fold, r=scottmcm

bors · bors · commit 4996b56ba964 · 2023-06-15T09:38:53.000Z
optimize slice::Iter::fold Fixes 2 of 4 cases from #106288 ``` OLD: test slice::fold_to_last ... bench: 248 ns/iter (+/- 3) NEW: test slice::fold_to_last ... bench: 0 ns/iter (+/- 0) ```
diff --git a/library/core/benches/slice.rs b/library/core/benches/slice.rs
@@ -1,3 +1,4 @@
+use core::ptr::NonNull;
 use test::black_box;
 use test::Bencher;
 
@@ -162,3 +163,11 @@ fn fill_byte_sized(b: &mut Bencher) {
         black_box(slice.fill(black_box(NewType(42))));
     });
 }
+
+// Tests the ability of the compiler to recognize that only the last slice item is needed
+// based on issue #106288
+#[bench]
+fn fold_to_last(b: &mut Bencher) {
+    let slice: &[i32] = &[0; 1024];
+    b.iter(|| black_box(slice).iter().fold(None, |_, r| Some(NonNull::from(r))));
+}
diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs
@@ -191,6 +191,39 @@ macro_rules! iterator {
                 self.next_back()
             }
 
+            #[inline]
+            fn fold<B, F>(self, init: B, mut f: F) -> B
+                where
+                    F: FnMut(B, Self::Item) -> B,
+            {
+                // this implementation consists of the following optimizations compared to the
+                // default implementation:
+                // - do-while loop, as is llvm's preferred loop shape,
+                //   see https://releases.llvm.org/16.0.0/docs/LoopTerminology.html#more-canonical-loops
+                // - bumps an index instead of a pointer since the latter case inhibits
+                //   some optimizations, see #111603
+                // - avoids Option wrapping/matching
+                if is_empty!(self) {
+                    return init;
+                }
+                let mut acc = init;
+                let mut i = 0;
+                let len = len!(self);
+                loop {
+                    // SAFETY: the loop iterates `i in 0..len`, which always is in bounds of
+                    // the slice allocation
+                    acc = f(acc, unsafe { & $( $mut_ )? *self.ptr.add(i).as_ptr() });
+                    // SAFETY: `i` can't overflow since it'll only reach usize::MAX if the
+                    // slice had that length, in which case we'll break out of the loop
+                    // after the increment
+                    i = unsafe { i.unchecked_add(1) };
+                    if i == len {
+                        break;
+                    }
+                }
+                acc
+            }
+
             // We override the default implementation, which uses `try_fold`,
             // because this simple implementation generates less LLVM IR and is
             // faster to compile.
diff --git a/tests/codegen/slice-iter-fold.rs b/tests/codegen/slice-iter-fold.rs
@@ -0,0 +1,14 @@
+// ignore-debug: the debug assertions get in the way
+// compile-flags: -O
+// min-llvm-version: 16
+#![crate_type = "lib"]
+
+// CHECK-LABEL: @slice_fold_to_last
+#[no_mangle]
+pub fn slice_fold_to_last(slice: &[i32]) -> Option<&i32> {
+    // CHECK-NOT: loop
+    // CHECK-NOT: br
+    // CHECK-NOT: call
+    // CHECK: ret
+    slice.iter().fold(None, |_, i| Some(i))
+}
diff --git a/tests/codegen/vec-shrink-panik.rs b/tests/codegen/vec-shrink-panik.rs
@@ -37,14 +37,6 @@ pub fn issue71861(vec: Vec<u32>) -> Box<[u32]> {
 // CHECK-LABEL: @issue75636
 #[no_mangle]
 pub fn issue75636<'a>(iter: &[&'a str]) -> Box<[&'a str]> {
-    // CHECK-NOT: panic
-
-    // Call to panic_cannot_unwind in case of double-panic is expected,
-    // on LLVM 16 and older, but other panics are not.
-    // old: filter
-    // old-NEXT: ; call core::panicking::panic_cannot_unwind
-    // old-NEXT: panic_cannot_unwind
-
     // CHECK-NOT: panic
     iter.iter().copied().collect()
 }