Skip to content

Commit b43d110

Browse files
committed
optimize str.replace
formatting rename some variables refactor replace_ascii fix function variable names
1 parent 9afe713 commit b43d110

File tree

3 files changed

+63
-1
lines changed

3 files changed

+63
-1
lines changed

library/alloc/src/str.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,33 @@ impl str {
268268
#[stable(feature = "rust1", since = "1.0.0")]
269269
#[inline]
270270
pub fn replace<P: Pattern>(&self, from: P, to: &str) -> String {
271-
let mut result = String::new();
271+
let mut capacity = 0;
272+
273+
#[inline]
274+
fn get_minimum_result_capacity(input_len: usize, from_len: usize, to_len: usize) -> usize {
275+
// check if output is going to be at least as long as input.
276+
if from_len <= to_len {
277+
return input_len;
278+
}
279+
// lower bound where we have the maximum number of matches:
280+
// max_n_matches = len(input) / len(from)
281+
// capacity = max_n_matches * to_utf8_len
282+
let max_n_matches = input_len.checked_div(from_len).unwrap_or(0);
283+
return max_n_matches * to_len;
284+
}
285+
286+
// Path for patterns that can be represented as utf8 bytes (str, char etc.).
287+
if let Some(from_as_utf8) = from.as_utf8_bytes() {
288+
let from_utf8_len = from_as_utf8.len();
289+
let to_utf8_len = to.as_bytes().len();
290+
// Fast path for ascii
291+
if from_utf8_len == 1 && to_utf8_len == 1 {
292+
return replace_ascii(&self.as_bytes(), from_as_utf8[0], to.as_bytes()[0]);
293+
}
294+
capacity = get_minimum_result_capacity(self.bytes().len(), from_utf8_len, to_utf8_len);
295+
}
296+
297+
let mut result = String::with_capacity(capacity);
272298
let mut last_end = 0;
273299
for (start, part) in self.match_indices(from) {
274300
result.push_str(unsafe { self.get_unchecked(last_end..start) });
@@ -661,3 +687,11 @@ fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> Vec<u8> {
661687

662688
out
663689
}
690+
#[inline]
691+
#[cfg(not(test))]
692+
#[cfg(not(no_global_oom_handling))]
693+
fn replace_ascii(utf8_bytes: &[u8], from: u8, to: u8) -> String {
694+
let result: Vec<u8> = utf8_bytes.iter().map(|b| if *b == from { to } else { *b }).collect();
695+
// SAFETY: We replaced ascii with ascii on valid utf8 strings.
696+
unsafe { String::from_utf8_unchecked(result) }
697+
}

library/alloc/src/string.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2319,6 +2319,11 @@ impl<'b> Pattern for &'b String {
23192319
{
23202320
self[..].strip_suffix_of(haystack)
23212321
}
2322+
2323+
#[inline]
2324+
fn as_utf8_bytes(&self) -> Option<&[u8]> {
2325+
Some(self.as_bytes())
2326+
}
23222327
}
23232328

23242329
macro_rules! impl_eq {

library/core/src/str/pattern.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,9 @@ pub trait Pattern: Sized {
160160
None
161161
}
162162
}
163+
164+
/// Returns the pattern as utf-8 bytes if possible.
165+
fn as_utf8_bytes(&self) -> Option<&[u8]>;
163166
}
164167

165168
// Searcher
@@ -599,6 +602,11 @@ impl Pattern for char {
599602
{
600603
self.encode_utf8(&mut [0u8; 4]).strip_suffix_of(haystack)
601604
}
605+
606+
#[inline]
607+
fn as_utf8_bytes(&self) -> Option<&[u8]> {
608+
None
609+
}
602610
}
603611

604612
/////////////////////////////////////////////////////////////////////////////
@@ -657,6 +665,11 @@ impl<C: MultiCharEq> Pattern for MultiCharEqPattern<C> {
657665
fn into_searcher(self, haystack: &str) -> MultiCharEqSearcher<'_, C> {
658666
MultiCharEqSearcher { haystack, char_eq: self.0, char_indices: haystack.char_indices() }
659667
}
668+
669+
#[inline]
670+
fn as_utf8_bytes(&self) -> Option<&[u8]> {
671+
None
672+
}
660673
}
661674

662675
unsafe impl<'a, C: MultiCharEq> Searcher<'a> for MultiCharEqSearcher<'a, C> {
@@ -747,6 +760,11 @@ macro_rules! pattern_methods {
747760
{
748761
($pmap)(self).strip_suffix_of(haystack)
749762
}
763+
764+
#[inline]
765+
fn as_utf8_bytes(&self) -> Option<&[u8]> {
766+
None
767+
}
750768
};
751769
}
752770

@@ -1022,6 +1040,11 @@ impl<'b> Pattern for &'b str {
10221040
None
10231041
}
10241042
}
1043+
1044+
#[inline]
1045+
fn as_utf8_bytes(&self) -> Option<&[u8]> {
1046+
Some(self.as_bytes())
1047+
}
10251048
}
10261049

10271050
/////////////////////////////////////////////////////////////////////////////

0 commit comments

Comments
 (0)