Skip to content

Commit d6eb7bc

Browse files
authored
Rollup merge of #103166 - the8472:copied-next-chunk, r=m-ou-se
Optimize `slice_iter.copied().next_chunk()` ``` OLD: test iter::bench_copied_array_chunks ... bench: 371 ns/iter (+/- 7) NEW: test iter::bench_copied_array_chunks ... bench: 31 ns/iter (+/- 0) ``` The default `next_chunk` implementation suffers from having to assemble the array byte by byte via `next()`, checking the `Option<&T>` and then dereferencing `&T`. The specialization copies the chunk directly from the slice.
2 parents 2efc90e + 873a18e commit d6eb7bc

File tree

3 files changed

+95
-0
lines changed

3 files changed

+95
-0
lines changed

Diff for: library/core/benches/iter.rs

+20
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use core::iter::*;
2+
use core::mem;
3+
use core::num::Wrapping;
24
use test::{black_box, Bencher};
35

46
#[bench]
@@ -398,3 +400,21 @@ fn bench_trusted_random_access_adapters(b: &mut Bencher) {
398400
acc
399401
})
400402
}
403+
404+
/// Exercises the iter::Copied specialization for slice::Iter
405+
#[bench]
406+
fn bench_copied_array_chunks(b: &mut Bencher) {
407+
let v = vec![1u8; 1024];
408+
409+
b.iter(|| {
410+
black_box(&v)
411+
.iter()
412+
.copied()
413+
.array_chunks::<{ mem::size_of::<u64>() }>()
414+
.map(|ary| {
415+
let d = u64::from_ne_bytes(ary);
416+
Wrapping(d.rotate_left(7).wrapping_add(1))
417+
})
418+
.sum::<Wrapping<u64>>()
419+
})
420+
}

Diff for: library/core/benches/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#![feature(int_log)]
55
#![feature(test)]
66
#![feature(trusted_random_access)]
7+
#![feature(iter_array_chunks)]
78

89
extern crate test;
910

Diff for: library/core/src/iter/adapters/copied.rs

+74
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@ use crate::iter::adapters::{
22
zip::try_get_unchecked, TrustedRandomAccess, TrustedRandomAccessNoCoerce,
33
};
44
use crate::iter::{FusedIterator, TrustedLen};
5+
use crate::mem::MaybeUninit;
6+
use crate::mem::SizedTypeProperties;
57
use crate::ops::Try;
8+
use crate::{array, ptr};
69

710
/// An iterator that copies the elements of an underlying iterator.
811
///
@@ -44,6 +47,15 @@ where
4447
self.it.next().copied()
4548
}
4649

50+
fn next_chunk<const N: usize>(
51+
&mut self,
52+
) -> Result<[Self::Item; N], array::IntoIter<Self::Item, N>>
53+
where
54+
Self: Sized,
55+
{
56+
<I as SpecNextChunk<'_, N, T>>::spec_next_chunk(&mut self.it)
57+
}
58+
4759
fn size_hint(&self) -> (usize, Option<usize>) {
4860
self.it.size_hint()
4961
}
@@ -166,3 +178,65 @@ where
166178
T: Copy,
167179
{
168180
}
181+
182+
trait SpecNextChunk<'a, const N: usize, T: 'a>: Iterator<Item = &'a T>
183+
where
184+
T: Copy,
185+
{
186+
fn spec_next_chunk(&mut self) -> Result<[T; N], array::IntoIter<T, N>>;
187+
}
188+
189+
impl<'a, const N: usize, I, T: 'a> SpecNextChunk<'a, N, T> for I
190+
where
191+
I: Iterator<Item = &'a T>,
192+
T: Copy,
193+
{
194+
default fn spec_next_chunk(&mut self) -> Result<[T; N], array::IntoIter<T, N>> {
195+
array::iter_next_chunk(&mut self.map(|e| *e))
196+
}
197+
}
198+
199+
impl<'a, const N: usize, T: 'a> SpecNextChunk<'a, N, T> for crate::slice::Iter<'a, T>
200+
where
201+
T: Copy,
202+
{
203+
fn spec_next_chunk(&mut self) -> Result<[T; N], array::IntoIter<T, N>> {
204+
let mut raw_array = MaybeUninit::uninit_array();
205+
206+
let len = self.len();
207+
208+
if T::IS_ZST {
209+
if len < N {
210+
let _ = self.advance_by(len);
211+
// SAFETY: ZSTs can be conjured ex nihilo; only the amount has to be correct
212+
return Err(unsafe { array::IntoIter::new_unchecked(raw_array, 0..len) });
213+
}
214+
215+
let _ = self.advance_by(N);
216+
// SAFETY: ditto
217+
return Ok(unsafe { MaybeUninit::array_assume_init(raw_array) });
218+
}
219+
220+
if len < N {
221+
// SAFETY: `len` indicates that this many elements are available and we just checked that
222+
// it fits into the array.
223+
unsafe {
224+
ptr::copy_nonoverlapping(
225+
self.as_ref().as_ptr(),
226+
raw_array.as_mut_ptr() as *mut T,
227+
len,
228+
);
229+
let _ = self.advance_by(len);
230+
return Err(array::IntoIter::new_unchecked(raw_array, 0..len));
231+
}
232+
}
233+
234+
// SAFETY: `len` is larger than the array size. Copy a fixed amount here to fully initialize
235+
// the array.
236+
unsafe {
237+
ptr::copy_nonoverlapping(self.as_ref().as_ptr(), raw_array.as_mut_ptr() as *mut T, N);
238+
let _ = self.advance_by(N);
239+
Ok(MaybeUninit::array_assume_init(raw_array))
240+
}
241+
}
242+
}

0 commit comments

Comments
 (0)