Skip to content

Commit 2b82c07

Browse files
committed
StrSearcher: Improve inner loop in TwoWaySearcher::next, next_back
The innermost loop of TwoWaySearcher checks the boundary of the haystack vs position + needle.len(), and it checks the last byte of the needle against the byteset. If these two steps are combined by using the indexing of the last needle byte's position as bounds check, the algorithm improves its throughput. We improve the innermost loop by reducing the number of instructions used, and elminating the panic case for the checked indexing that was previously used. Selected benchmarks from the external/workspace testsuite. Benchmarks improve across the board. ``` before: test bb_in_aa::twoway_find ... bench: 4,229 ns/iter (+/- 1,305) = 23646 MB/s test bb_in_aa::twoway_rfind ... bench: 3,873 ns/iter (+/- 101) = 25819 MB/s test short_1let_long::twoway_find ... bench: 7,075 ns/iter (+/- 29) = 360 MB/s test short_1let_long::twoway_rfind ... bench: 6,640 ns/iter (+/- 79) = 384 MB/s test short_2let_long::twoway_find ... bench: 3,823 ns/iter (+/- 16) = 667 MB/s test short_2let_long::twoway_rfind ... bench: 3,774 ns/iter (+/- 44) = 675 MB/s test short_3let_long::twoway_find ... bench: 3,582 ns/iter (+/- 47) = 712 MB/s test short_3let_long::twoway_rfind ... bench: 3,616 ns/iter (+/- 34) = 705 MB/s with this commit: test bb_in_aa::twoway_find ... bench: 2,952 ns/iter (+/- 20) = 33875 MB/s test bb_in_aa::twoway_rfind ... bench: 2,939 ns/iter (+/- 99) = 34025 MB/s test short_1let_long::twoway_find ... bench: 4,593 ns/iter (+/- 4) = 555 MB/s test short_1let_long::twoway_rfind ... bench: 4,592 ns/iter (+/- 76) = 555 MB/s test short_2let_long::twoway_find ... bench: 2,804 ns/iter (+/- 3) = 909 MB/s test short_2let_long::twoway_rfind ... bench: 2,807 ns/iter (+/- 40) = 908 MB/s test short_3let_long::twoway_find ... bench: 3,105 ns/iter (+/- 120) = 821 MB/s test short_3let_long::twoway_rfind ... bench: 3,019 ns/iter (+/- 50) = 844 MB/s ``` - `bb_in_aa`: fast skip due to byteset filter loop improves. - 1/2/3let: Searches for 1, 2, or 3 ascii bytes improves.
1 parent 7ebae85 commit 2b82c07

File tree

1 file changed

+22
-10
lines changed

1 file changed

+22
-10
lines changed

src/libcore/str/pattern.rs

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -906,19 +906,25 @@ impl TwoWaySearcher {
906906
{
907907
// `next()` uses `self.position` as its cursor
908908
let old_pos = self.position;
909+
let needle_last = needle.len() - 1;
909910
'search: loop {
910911
// Check that we have room to search in
911-
if needle.len() > haystack.len() - self.position {
912-
self.position = haystack.len();
913-
return S::rejecting(old_pos, self.position);
914-
}
912+
// position + needle_last can not overflow if we assume slices
913+
// are bounded by isize's range.
914+
let tail_byte = match haystack.get(self.position + needle_last) {
915+
Some(&b) => b,
916+
None => {
917+
self.position = haystack.len();
918+
return S::rejecting(old_pos, self.position);
919+
}
920+
};
915921

916922
if S::use_early_reject() && old_pos != self.position {
917923
return S::rejecting(old_pos, self.position);
918924
}
919925

920926
// Quickly skip by large portions unrelated to our substring
921-
if !self.byteset_contains(haystack[self.position + needle.len() - 1]) {
927+
if !self.byteset_contains(tail_byte) {
922928
self.position += needle.len();
923929
if !long_period {
924930
self.memory = 0;
@@ -986,17 +992,23 @@ impl TwoWaySearcher {
986992
let old_end = self.end;
987993
'search: loop {
988994
// Check that we have room to search in
989-
if needle.len() > self.end {
990-
self.end = 0;
991-
return S::rejecting(0, old_end);
992-
}
995+
// end - needle.len() will wrap around when there is no more room,
996+
// but due to slice length limits it can never wrap all the way back
997+
// into the length of haystack.
998+
let front_byte = match haystack.get(self.end.wrapping_sub(needle.len())) {
999+
Some(&b) => b,
1000+
None => {
1001+
self.end = 0;
1002+
return S::rejecting(0, old_end);
1003+
}
1004+
};
9931005

9941006
if S::use_early_reject() && old_end != self.end {
9951007
return S::rejecting(self.end, old_end);
9961008
}
9971009

9981010
// Quickly skip by large portions unrelated to our substring
999-
if !self.byteset_contains(haystack[self.end - needle.len()]) {
1011+
if !self.byteset_contains(front_byte) {
10001012
self.end -= needle.len();
10011013
if !long_period {
10021014
self.memory_back = needle.len();

0 commit comments

Comments
 (0)