Skip to content

Commit dda4a88

Browse files
committed
Auto merge of #83515 - tamird:string-remove-matches-rev, r=m-ou-se
String::remove_matches O(n^2) -> O(n) Copy only non-matching bytes. Replace collection of matches into a vector with iteration over rejections, exploiting the guarantee that we mutate parts of the haystack that have already been searched over. r? `@joshtriplett`
2 parents e4a6032 + 977903b commit dda4a88

File tree

1 file changed

+39
-22
lines changed

1 file changed

+39
-22
lines changed

library/alloc/src/string.rs

+39-22
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ use core::fmt;
4848
use core::hash;
4949
#[cfg(not(no_global_oom_handling))]
5050
use core::iter::FromIterator;
51-
use core::iter::FusedIterator;
51+
use core::iter::{from_fn, FusedIterator};
5252
#[cfg(not(no_global_oom_handling))]
5353
use core::ops::Add;
5454
#[cfg(not(no_global_oom_handling))]
@@ -1290,32 +1290,49 @@ impl String {
12901290
{
12911291
use core::str::pattern::Searcher;
12921292

1293-
let matches = {
1293+
let rejections = {
12941294
let mut searcher = pat.into_searcher(self);
1295-
let mut matches = Vec::new();
1296-
1297-
while let Some(m) = searcher.next_match() {
1298-
matches.push(m);
1299-
}
1300-
1301-
matches
1295+
// Per Searcher::next:
1296+
//
1297+
// A Match result needs to contain the whole matched pattern,
1298+
// however Reject results may be split up into arbitrary many
1299+
// adjacent fragments. Both ranges may have zero length.
1300+
//
1301+
// In practice the implementation of Searcher::next_match tends to
1302+
// be more efficient, so we use it here and do some work to invert
1303+
// matches into rejections since that's what we want to copy below.
1304+
let mut front = 0;
1305+
let rejections: Vec<_> = from_fn(|| {
1306+
let (start, end) = searcher.next_match()?;
1307+
let prev_front = front;
1308+
front = end;
1309+
Some((prev_front, start))
1310+
})
1311+
.collect();
1312+
rejections.into_iter().chain(core::iter::once((front, self.len())))
13021313
};
13031314

1304-
let len = self.len();
1305-
let mut shrunk_by = 0;
1315+
let mut len = 0;
1316+
let ptr = self.vec.as_mut_ptr();
1317+
1318+
for (start, end) in rejections {
1319+
let count = end - start;
1320+
if start != len {
1321+
// SAFETY: per Searcher::next:
1322+
//
1323+
// The stream of Match and Reject values up to a Done will
1324+
// contain index ranges that are adjacent, non-overlapping,
1325+
// covering the whole haystack, and laying on utf8
1326+
// boundaries.
1327+
unsafe {
1328+
ptr::copy(ptr.add(start), ptr.add(len), count);
1329+
}
1330+
}
1331+
len += count;
1332+
}
13061333

1307-
// SAFETY: start and end will be on utf8 byte boundaries per
1308-
// the Searcher docs
13091334
unsafe {
1310-
for (start, end) in matches {
1311-
ptr::copy(
1312-
self.vec.as_mut_ptr().add(end - shrunk_by),
1313-
self.vec.as_mut_ptr().add(start - shrunk_by),
1314-
len - end,
1315-
);
1316-
shrunk_by += end - start;
1317-
}
1318-
self.vec.set_len(len - shrunk_by);
1335+
self.vec.set_len(len);
13191336
}
13201337
}
13211338

0 commit comments

Comments
 (0)