Skip to content

Commit 977903b

Browse files
committed
String::remove_matches O(n^2) -> O(n)
Copy only non-matching bytes.
1 parent 38013e7 commit 977903b

File tree

1 file changed

+38
-15
lines changed

1 file changed

+38
-15
lines changed

library/alloc/src/string.rs

+38-15
Original file line numberDiff line numberDiff line change
@@ -1290,26 +1290,49 @@ impl String {
12901290
{
12911291
use core::str::pattern::Searcher;
12921292

1293-
let matches: Vec<_> = {
1293+
let rejections = {
12941294
let mut searcher = pat.into_searcher(self);
1295-
from_fn(|| searcher.next_match()).collect()
1295+
// Per Searcher::next:
1296+
//
1297+
// A Match result needs to contain the whole matched pattern,
1298+
// however Reject results may be split up into arbitrary many
1299+
// adjacent fragments. Both ranges may have zero length.
1300+
//
1301+
// In practice the implementation of Searcher::next_match tends to
1302+
// be more efficient, so we use it here and do some work to invert
1303+
// matches into rejections since that's what we want to copy below.
1304+
let mut front = 0;
1305+
let rejections: Vec<_> = from_fn(|| {
1306+
let (start, end) = searcher.next_match()?;
1307+
let prev_front = front;
1308+
front = end;
1309+
Some((prev_front, start))
1310+
})
1311+
.collect();
1312+
rejections.into_iter().chain(core::iter::once((front, self.len())))
12961313
};
12971314

1298-
let len = self.len();
1299-
let mut shrunk_by = 0;
1315+
let mut len = 0;
1316+
let ptr = self.vec.as_mut_ptr();
1317+
1318+
for (start, end) in rejections {
1319+
let count = end - start;
1320+
if start != len {
1321+
// SAFETY: per Searcher::next:
1322+
//
1323+
// The stream of Match and Reject values up to a Done will
1324+
// contain index ranges that are adjacent, non-overlapping,
1325+
// covering the whole haystack, and laying on utf8
1326+
// boundaries.
1327+
unsafe {
1328+
ptr::copy(ptr.add(start), ptr.add(len), count);
1329+
}
1330+
}
1331+
len += count;
1332+
}
13001333

1301-
// SAFETY: start and end will be on utf8 byte boundaries per
1302-
// the Searcher docs
13031334
unsafe {
1304-
for (start, end) in matches {
1305-
ptr::copy(
1306-
self.vec.as_mut_ptr().add(end - shrunk_by),
1307-
self.vec.as_mut_ptr().add(start - shrunk_by),
1308-
len - end,
1309-
);
1310-
shrunk_by += end - start;
1311-
}
1312-
self.vec.set_len(len - shrunk_by);
1335+
self.vec.set_len(len);
13131336
}
13141337
}
13151338

0 commit comments

Comments
 (0)