Skip to content

Commit 1d2b4b9

Browse files
committed
Optimize str::replace
1 parent d802c1f commit 1d2b4b9

File tree

1 file changed

+49
-40
lines changed

1 file changed

+49
-40
lines changed

src/libcore/str.rs

Lines changed: 49 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export
3737
slice,
3838
split, splitn, split_nonempty,
3939
split_char, splitn_char, split_char_nonempty,
40-
split_str,
40+
split_str, split_str_nonempty,
4141
lines,
4242
lines_any,
4343
words,
@@ -494,31 +494,20 @@ fn split_inner(s: str, sepfn: fn(cc: char) -> bool, count: uint,
494494
result
495495
}
496496

497-
/*
498-
Function: split_str
499-
500-
Splits a string into a vector of the substrings separated by a given string
501-
502-
Note that this has recently been changed. For example:
503-
> assert ["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", ".")
504-
505-
FIXME: Boyer-Moore should be faster
506-
*/
507-
fn split_str(s: str, sep: str) -> [str] unsafe {
497+
// FIXME use Boyer-Moore
498+
fn iter_matches(s: str, sep: str, f: fn(uint, uint)) {
508499
let sep_len = len(sep), l = len(s);
509500
assert sep_len > 0u;
510-
let result = [], i = 0u, start = 0u;
511-
let match_start = 0u, match_i = 0u;
501+
let i = 0u, match_start = 0u, match_i = 0u;
512502

513503
while i < l {
514504
if s[i] == sep[match_i] {
515505
if match_i == 0u { match_start = i; }
516506
match_i += 1u;
517507
// Found a match
518508
if match_i == sep_len {
519-
result += [unsafe::slice_bytes(s, start, match_start)];
509+
f(match_start, i + 1u);
520510
match_i = 0u;
521-
start = i + 1u;
522511
}
523512
i += 1u;
524513
} else {
@@ -531,7 +520,40 @@ fn split_str(s: str, sep: str) -> [str] unsafe {
531520
}
532521
}
533522
}
534-
result += [unsafe::slice_bytes(s, start, l)];
523+
}
524+
525+
fn iter_between_matches(s: str, sep: str, f: fn(uint, uint)) {
526+
let last_end = 0u;
527+
iter_matches(s, sep) {|from, to|
528+
f(last_end, from);
529+
last_end = to;
530+
}
531+
f(last_end, len(s));
532+
}
533+
534+
/*
535+
Function: split_str
536+
537+
Splits a string into a vector of the substrings separated by a given string
538+
539+
Note that this has recently been changed. For example:
540+
> assert ["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", ".")
541+
*/
542+
fn split_str(s: str, sep: str) -> [str] {
543+
let result = [];
544+
iter_between_matches(s, sep) {|from, to|
545+
unsafe { result += [unsafe::slice_bytes(s, from, to)]; }
546+
}
547+
result
548+
}
549+
550+
fn split_str_nonempty(s: str, sep: str) -> [str] {
551+
let result = [];
552+
iter_between_matches(s, sep) {|from, to|
553+
if to > from {
554+
unsafe { result += [unsafe::slice_bytes(s, from, to)]; }
555+
}
556+
}
535557
result
536558
}
537559

@@ -587,7 +609,6 @@ fn to_upper(s: str) -> str {
587609
map(s, char::to_upper)
588610
}
589611

590-
// FIXME: This is super-inefficient: stop the extra slicing copies
591612
/*
592613
Function: replace
593614
@@ -604,24 +625,12 @@ Returns:
604625
The original string with all occurances of `from` replaced with `to`
605626
*/
606627
fn replace(s: str, from: str, to: str) -> str unsafe {
607-
assert is_not_empty(from);
608-
if len(s) == 0u {
609-
ret "";
610-
} else if starts_with(s, from) {
611-
ret to + replace(
612-
unsafe::slice_bytes(s, len(from), len(s)),
613-
from, to);
614-
} else {
615-
let idx;
616-
alt find(s, from) {
617-
some(x) { idx = x; }
618-
none { ret s; }
619-
}
620-
let before = unsafe::slice_bytes(s, 0u, idx as uint);
621-
let after = unsafe::slice_bytes(s, idx as uint + len(from),
622-
len(s));
623-
ret before + to + replace(after, from, to);
628+
let result = "", first = true;
629+
iter_between_matches(s, from) {|start, end|
630+
if first { first = false; } else { result += to; }
631+
unsafe { result += unsafe::slice_bytes(s, start, end); }
624632
}
633+
result
625634
}
626635

627636
/*
@@ -1911,12 +1920,12 @@ mod tests {
19111920
#[test]
19121921
fn test_replace() {
19131922
let a = "a";
1914-
assert (replace("", a, "b") == "");
1915-
assert (replace("a", a, "b") == "b");
1916-
assert (replace("ab", a, "b") == "bb");
1923+
assert replace("", a, "b") == "";
1924+
assert replace("a", a, "b") == "b";
1925+
assert replace("ab", a, "b") == "bb";
19171926
let test = "test";
1918-
assert (replace(" test test ", test, "toast") == " toast toast ");
1919-
assert (replace(" test test ", test, "") == " ");
1927+
assert replace(" test test ", test, "toast") == " toast toast ";
1928+
assert replace(" test test ", test, "") == " ";
19201929
}
19211930

19221931
#[test]

0 commit comments

Comments
 (0)