Skip to content

Commit 4996b56

Browse files
committed
Auto merge of #106343 - the8472:slice-iter-fold, r=scottmcm
optimize slice::Iter::fold Fixes 2 of 4 cases from #106288 ``` OLD: test slice::fold_to_last ... bench: 248 ns/iter (+/- 3) NEW: test slice::fold_to_last ... bench: 0 ns/iter (+/- 0) ```
2 parents 5a65be8 + d90508f commit 4996b56

File tree

4 files changed

+56
-8
lines changed

4 files changed

+56
-8
lines changed

library/core/benches/slice.rs

+9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use core::ptr::NonNull;
12
use test::black_box;
23
use test::Bencher;
34

@@ -162,3 +163,11 @@ fn fill_byte_sized(b: &mut Bencher) {
162163
black_box(slice.fill(black_box(NewType(42))));
163164
});
164165
}
166+
167+
// Tests the ability of the compiler to recognize that only the last slice item is needed
168+
// based on issue #106288
169+
#[bench]
170+
fn fold_to_last(b: &mut Bencher) {
171+
let slice: &[i32] = &[0; 1024];
172+
b.iter(|| black_box(slice).iter().fold(None, |_, r| Some(NonNull::from(r))));
173+
}

library/core/src/slice/iter/macros.rs

+33
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,39 @@ macro_rules! iterator {
191191
self.next_back()
192192
}
193193

194+
#[inline]
195+
fn fold<B, F>(self, init: B, mut f: F) -> B
196+
where
197+
F: FnMut(B, Self::Item) -> B,
198+
{
199+
// this implementation consists of the following optimizations compared to the
200+
// default implementation:
201+
// - do-while loop, as is llvm's preferred loop shape,
202+
// see https://releases.llvm.org/16.0.0/docs/LoopTerminology.html#more-canonical-loops
203+
// - bumps an index instead of a pointer since the latter case inhibits
204+
// some optimizations, see #111603
205+
// - avoids Option wrapping/matching
206+
if is_empty!(self) {
207+
return init;
208+
}
209+
let mut acc = init;
210+
let mut i = 0;
211+
let len = len!(self);
212+
loop {
213+
// SAFETY: the loop iterates `i in 0..len`, which always is in bounds of
214+
// the slice allocation
215+
acc = f(acc, unsafe { & $( $mut_ )? *self.ptr.add(i).as_ptr() });
216+
// SAFETY: `i` can't overflow since it'll only reach usize::MAX if the
217+
// slice had that length, in which case we'll break out of the loop
218+
// after the increment
219+
i = unsafe { i.unchecked_add(1) };
220+
if i == len {
221+
break;
222+
}
223+
}
224+
acc
225+
}
226+
194227
// We override the default implementation, which uses `try_fold`,
195228
// because this simple implementation generates less LLVM IR and is
196229
// faster to compile.

tests/codegen/slice-iter-fold.rs

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// ignore-debug: the debug assertions get in the way
2+
// compile-flags: -O
3+
// min-llvm-version: 16
4+
#![crate_type = "lib"]
5+
6+
// CHECK-LABEL: @slice_fold_to_last
7+
#[no_mangle]
8+
pub fn slice_fold_to_last(slice: &[i32]) -> Option<&i32> {
9+
// CHECK-NOT: loop
10+
// CHECK-NOT: br
11+
// CHECK-NOT: call
12+
// CHECK: ret
13+
slice.iter().fold(None, |_, i| Some(i))
14+
}

tests/codegen/vec-shrink-panik.rs

-8
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,6 @@ pub fn issue71861(vec: Vec<u32>) -> Box<[u32]> {
3737
// CHECK-LABEL: @issue75636
3838
#[no_mangle]
3939
pub fn issue75636<'a>(iter: &[&'a str]) -> Box<[&'a str]> {
40-
// CHECK-NOT: panic
41-
42-
// Call to panic_cannot_unwind in case of double-panic is expected,
43-
// on LLVM 16 and older, but other panics are not.
44-
// old: filter
45-
// old-NEXT: ; call core::panicking::panic_cannot_unwind
46-
// old-NEXT: panic_cannot_unwind
47-
4840
// CHECK-NOT: panic
4941
iter.iter().copied().collect()
5042
}

0 commit comments

Comments
 (0)