Skip to content

Commit 4af1f99

Browse files
authored
Rollup merge of #115331 - the8472:chars_advance, r=cuviper
optimize str::iter::Chars::advance_by ``` OLD: str::iter::chars_advance_by_0001 0.00ns/iter +/- 0.00ns str::iter::chars_advance_by_0010 13.00ns/iter +/- 1.00ns str::iter::chars_advance_by_1000 1.20µs/iter +/- 15.00ns NEW: str::iter::chars_advance_by_0001 0.00ns/iter +/- 0.00ns str::iter::chars_advance_by_0010 6.00ns/iter +/- 0.00ns str::iter::chars_advance_by_1000 75.00ns/iter +/- 1.00ns ```
2 parents c2ec908 + 40cf1f9 commit 4af1f99

File tree

5 files changed

+80
-0
lines changed

5 files changed

+80
-0
lines changed

library/alloc/tests/str.rs

+11
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,17 @@ fn test_iterator() {
11701170
assert_eq!(s.chars().count(), v.len());
11711171
}
11721172

1173+
#[test]
1174+
fn test_iterator_advance() {
1175+
let s = "「赤錆」と呼ばれる鉄錆は、水の存在下での鉄の自然酸化によって生じる、オキシ水酸化鉄(III) 等の(含水)酸化物粒子の疎な凝集膜であるとみなせる。";
1176+
let chars: Vec<char> = s.chars().collect();
1177+
let mut it = s.chars();
1178+
it.advance_by(1).unwrap();
1179+
assert_eq!(it.next(), Some(chars[1]));
1180+
it.advance_by(33).unwrap();
1181+
assert_eq!(it.next(), Some(chars[35]));
1182+
}
1183+
11731184
#[test]
11741185
fn test_rev_iterator() {
11751186
let s = "ศไทย中华Việt Nam";

library/core/benches/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#![feature(trusted_random_access)]
66
#![feature(iter_array_chunks)]
77
#![feature(iter_next_chunk)]
8+
#![feature(iter_advance_by)]
89

910
extern crate test;
1011

library/core/benches/str.rs

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use test::{black_box, Bencher};
33

44
mod char_count;
55
mod corpora;
6+
mod iter;
67

78
#[bench]
89
fn str_validate_emoji(b: &mut Bencher) {

library/core/benches/str/iter.rs

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
use super::corpora;
2+
use test::{black_box, Bencher};
3+
4+
#[bench]
5+
fn chars_advance_by_1000(b: &mut Bencher) {
6+
b.iter(|| black_box(corpora::ru::LARGE).chars().advance_by(1000));
7+
}
8+
9+
#[bench]
10+
fn chars_advance_by_0010(b: &mut Bencher) {
11+
b.iter(|| black_box(corpora::ru::LARGE).chars().advance_by(10));
12+
}
13+
14+
#[bench]
15+
fn chars_advance_by_0001(b: &mut Bencher) {
16+
b.iter(|| black_box(corpora::ru::LARGE).chars().advance_by(1));
17+
}

library/core/src/str/iter.rs

+50
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use crate::iter::{TrustedRandomAccess, TrustedRandomAccessNoCoerce};
88
use crate::ops::Try;
99
use crate::option;
1010
use crate::slice::{self, Split as SliceSplit};
11+
use core::num::NonZeroUsize;
1112

1213
use super::from_utf8_unchecked;
1314
use super::pattern::Pattern;
@@ -49,6 +50,55 @@ impl<'a> Iterator for Chars<'a> {
4950
super::count::count_chars(self.as_str())
5051
}
5152

53+
#[inline]
54+
fn advance_by(&mut self, mut remainder: usize) -> Result<(), NonZeroUsize> {
55+
const CHUNK_SIZE: usize = 32;
56+
57+
if remainder >= CHUNK_SIZE {
58+
let mut chunks = self.iter.as_slice().array_chunks::<CHUNK_SIZE>();
59+
let mut bytes_skipped: usize = 0;
60+
61+
while remainder > CHUNK_SIZE
62+
&& let Some(chunk) = chunks.next()
63+
{
64+
bytes_skipped += CHUNK_SIZE;
65+
66+
let mut start_bytes = [false; CHUNK_SIZE];
67+
68+
for i in 0..CHUNK_SIZE {
69+
start_bytes[i] = !super::validations::utf8_is_cont_byte(chunk[i]);
70+
}
71+
72+
remainder -= start_bytes.into_iter().map(|i| i as u8).sum::<u8>() as usize;
73+
}
74+
75+
// SAFETY: The amount of bytes exists since we just iterated over them,
76+
// so advance_by will succeed.
77+
unsafe { self.iter.advance_by(bytes_skipped).unwrap_unchecked() };
78+
79+
// skip trailing continuation bytes
80+
while self.iter.len() > 0 {
81+
let b = self.iter.as_slice()[0];
82+
if !super::validations::utf8_is_cont_byte(b) {
83+
break;
84+
}
85+
// SAFETY: We just peeked at the byte, therefore it exists
86+
unsafe { self.iter.advance_by(1).unwrap_unchecked() };
87+
}
88+
}
89+
90+
while (remainder > 0) && (self.iter.len() > 0) {
91+
remainder -= 1;
92+
let b = self.iter.as_slice()[0];
93+
let slurp = super::validations::utf8_char_width(b);
94+
// SAFETY: utf8 validity requires that the string must contain
95+
// the continuation bytes (if any)
96+
unsafe { self.iter.advance_by(slurp).unwrap_unchecked() };
97+
}
98+
99+
NonZeroUsize::new(remainder).map_or(Ok(()), Err)
100+
}
101+
52102
#[inline]
53103
fn size_hint(&self) -> (usize, Option<usize>) {
54104
let len = self.iter.len();

0 commit comments

Comments
 (0)