Skip to content

Commit ae8ffa6

Browse files
committed
Auto merge of #111850 - the8472:external-step-by, r=scottmcm
Specialize `StepBy<Range<{integer}>>` OLD iter::bench_range_step_by_fold_u16 700.00ns/iter +/- 10.00ns iter::bench_range_step_by_fold_usize 519.00ns/iter +/- 6.00ns iter::bench_range_step_by_loop_u32 555.00ns/iter +/- 7.00ns iter::bench_range_step_by_sum_reducible 37.00ns/iter +/- 0.00ns NEW iter::bench_range_step_by_fold_u16 49.00ns/iter +/- 0.00ns iter::bench_range_step_by_fold_usize 194.00ns/iter +/- 1.00ns iter::bench_range_step_by_loop_u32 98.00ns/iter +/- 0.00ns iter::bench_range_step_by_sum_reducible 1.00ns/iter +/- 0.00ns NEW + `-Ctarget-cpu=x86-64-v3` iter::bench_range_step_by_fold_u16 22.00ns/iter +/- 0.00ns iter::bench_range_step_by_fold_usize 80.00ns/iter +/- 1.00ns iter::bench_range_step_by_loop_u32 41.00ns/iter +/- 0.00ns iter::bench_range_step_by_sum_reducible 1.00ns/iter +/- 0.00ns I have only optimized for walltime of those methods, I haven't tested whether it eliminates bounds checks when indexing into slices via things like `(0..slice.len()).step_by(16)`.
2 parents 7f01f03 + f174547 commit ae8ffa6

File tree

3 files changed

+482
-36
lines changed

3 files changed

+482
-36
lines changed

Diff for: library/core/benches/iter.rs

+52
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use core::borrow::Borrow;
22
use core::iter::*;
33
use core::mem;
44
use core::num::Wrapping;
5+
use core::ops::Range;
56
use test::{black_box, Bencher};
67

78
#[bench]
@@ -69,6 +70,57 @@ fn bench_max(b: &mut Bencher) {
6970
})
7071
}
7172

73+
#[bench]
74+
fn bench_range_step_by_sum_reducible(b: &mut Bencher) {
75+
let r = 0u32..1024;
76+
b.iter(|| {
77+
let r = black_box(r.clone()).step_by(8);
78+
79+
let mut sum: u32 = 0;
80+
for i in r {
81+
sum += i;
82+
}
83+
84+
sum
85+
})
86+
}
87+
88+
#[bench]
89+
fn bench_range_step_by_loop_u32(b: &mut Bencher) {
90+
let r = 0..(u16::MAX as u32);
91+
b.iter(|| {
92+
let r = black_box(r.clone()).step_by(64);
93+
94+
let mut sum: u32 = 0;
95+
for i in r {
96+
let i = i ^ i.wrapping_sub(1);
97+
sum = sum.wrapping_add(i);
98+
}
99+
100+
sum
101+
})
102+
}
103+
104+
#[bench]
105+
fn bench_range_step_by_fold_usize(b: &mut Bencher) {
106+
let r: Range<usize> = 0..(u16::MAX as usize);
107+
b.iter(|| {
108+
let r = black_box(r.clone());
109+
r.step_by(64)
110+
.map(|x: usize| x ^ (x.wrapping_sub(1)))
111+
.fold(0usize, |acc, i| acc.wrapping_add(i))
112+
})
113+
}
114+
115+
#[bench]
116+
fn bench_range_step_by_fold_u16(b: &mut Bencher) {
117+
let r: Range<u16> = 0..u16::MAX;
118+
b.iter(|| {
119+
let r = black_box(r.clone());
120+
r.step_by(64).map(|x: u16| x ^ (x.wrapping_sub(1))).fold(0u16, |acc, i| acc.wrapping_add(i))
121+
})
122+
}
123+
72124
pub fn copy_zip(xs: &[u8], ys: &mut [u8]) {
73125
for (a, b) in ys.iter_mut().zip(xs) {
74126
*a = *b;

0 commit comments

Comments
 (0)