Skip to content

Commit d74606e

Browse files
committed
pre-rebase commit
1 parent 624a685 commit d74606e

File tree

2 files changed

+119
-101
lines changed

2 files changed

+119
-101
lines changed

src/libcore/str.rs

Lines changed: 102 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,15 @@ pub fn from_slice(s: &str) -> ~str {
5656

5757
impl ToStr for ~str {
5858
#[inline(always)]
59-
fn to_str(&self) -> ~str { copy *self }
59+
fn to_str(&self) -> ~str { from_slice(*self) }
6060
}
6161
impl ToStr for &'self str {
6262
#[inline(always)]
63-
fn to_str(&self) -> ~str { ::str::from_slice(*self) }
63+
fn to_str(&self) -> ~str { from_slice(*self) }
6464
}
6565
impl ToStr for @str {
6666
#[inline(always)]
67-
fn to_str(&self) -> ~str { ::str::from_slice(*self) }
67+
fn to_str(&self) -> ~str { from_slice(*self) }
6868
}
6969

7070
/**
@@ -383,7 +383,7 @@ Section: Transforming strings
383383
*/
384384

385385
/**
386-
* Converts a string to a vector of bytes
386+
* Converts a string to a unique vector of bytes
387387
*
388388
* The result vector is not null-terminated.
389389
*/
@@ -403,22 +403,19 @@ pub fn byte_slice<T>(s: &str, f: &fn(v: &[u8]) -> T) -> T {
403403
}
404404
}
405405

406-
/// Convert a string to a vector of characters
407-
pub fn chars(s: &str) -> ~[char] {
408-
let mut buf = ~[], i = 0;
409-
let len = len(s);
410-
while i < len {
411-
let CharRange {ch, next} = char_range_at(s, i);
412-
unsafe { buf.push(ch); }
413-
i = next;
406+
/// Convert a string to a unique vector of characters
407+
pub fn to_chars(s: &str) -> ~[char] {
408+
let mut buf = ~[];
409+
for each_char(s) |c| {
410+
buf.push(c);
414411
}
415412
buf
416413
}
417414

418415
/**
419416
* Take a substring of another.
420417
*
421-
* Returns a string containing `n` characters starting at byte offset
418+
* Returns a slice pointing at `n` characters starting from byte offset
422419
* `begin`.
423420
*/
424421
pub fn substr(s: &'a str, begin: uint, n: uint) -> &'a str {
@@ -437,10 +434,17 @@ pub fn slice(s: &'a str, begin: uint, end: uint) -> &'a str {
437434
unsafe { raw::slice_bytes(s, begin, end) }
438435
}
439436

440-
/// Splits a string into substrings at each occurrence of a given
441-
/// character.
442-
pub fn split_char(s: &str, sep: char) -> ~[~str] {
443-
split_char_inner(s, sep, len(s), true, true)
437+
/// Splits a string into substrings at each occurrence of a given character
438+
pub fn each_split_char(s: &str, sep: char, it: &fn(&str) -> bool) {
439+
each_split_char_inner(s, sep, len(s), true, true, it)
440+
}
441+
442+
/**
443+
* Like `split_char`, but a trailing empty string is omitted
444+
* (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
445+
*/
446+
pub fn each_split_char_no_trailing(s: &str, sep: char, it: &fn(&str) -> bool) {
447+
each_split_char_inner(s, sep, len(s), true, false, it)
444448
}
445449

446450
/**
@@ -449,35 +453,25 @@ pub fn split_char(s: &str, sep: char) -> ~[~str] {
449453
*
450454
* The byte must be a valid UTF-8/ASCII byte
451455
*/
452-
pub fn splitn_char(s: &str, sep: char, count: uint) -> ~[~str] {
453-
split_char_inner(s, sep, count, true, true)
456+
pub fn each_splitn_char(s: &str, sep: char, count: uint, it: &fn(&str) -> bool) {
457+
each_split_char_inner(s, sep, count, true, true, it)
454458
}
455459

456460
/// Like `split_char`, but omits empty strings from the returned vector
457-
pub fn split_char_nonempty(s: &str, sep: char) -> ~[~str] {
458-
split_char_inner(s, sep, len(s), false, false)
459-
}
460-
461-
/**
462-
* Like `split_char`, but a trailing empty string is omitted
463-
* (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
464-
*/
465-
pub fn split_char_no_trailing(s: &str, sep: char) -> ~[~str] {
466-
split_char_inner(s, sep, len(s), true, false)
461+
pub fn each_split_char_nonempty(s: &str, sep: char, it: &fn(&str) -> bool) {
462+
each_split_char_inner(s, sep, len(s), false, false, it)
467463
}
468464

469-
fn split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool,
470-
allow_trailing_empty: bool) -> ~[~str] {
465+
fn each_split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool,
466+
allow_trailing_empty: bool), it: &fn(&str) -> bool) {
471467
if sep < 128u as char {
472468
let b = sep as u8, l = len(s);
473-
let mut result = ~[], done = 0u;
469+
let mut done = 0u;
474470
let mut i = 0u, start = 0u;
475471
while i < l && done < count {
476472
if s[i] == b {
477473
if allow_empty || start < i {
478-
unsafe {
479-
result.push(raw::slice_bytes_unique(s, start, i));
480-
}
474+
if !it( unsafe{ raw::slice_bytes(s, start, i) } ) { return; }
481475
}
482476
start = i + 1u;
483477
done += 1u;
@@ -486,68 +480,57 @@ fn split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool,
486480
}
487481
// only push a non-empty trailing substring
488482
if allow_trailing_empty || start < l {
489-
unsafe { result.push(raw::slice_bytes_unique(s, start, l) ) };
483+
if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return; }
490484
}
491-
result
492485
} else {
493-
split_inner(s, |cur| cur == sep, count, allow_empty, allow_trailing_empty)
486+
each_split_inner(s, |cur| cur == sep, count, allow_empty, allow_trailing_empty, it)
494487
}
495488
}
496489

497-
498490
/// Splits a string into substrings using a character function
499-
pub fn split(s: &str, sepfn: &fn(char) -> bool) -> ~[~str] {
500-
split_inner(s, sepfn, len(s), true, true)
491+
pub fn each_split(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) {
492+
each_split_inner(s, sepfn, len(s), true, true, it)
493+
}
494+
495+
/**
496+
* Like `split`, but a trailing empty string is omitted
497+
* (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
498+
*/
499+
pub fn each_split_no_trailing(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) {
500+
each_split_inner(s, sepfn, len(s), true, false, it)
501501
}
502502

503503
/**
504504
* Splits a string into substrings using a character function, cutting at
505505
* most `count` times.
506506
*/
507-
pub fn splitn(s: &str,
508-
sepfn: &fn(char) -> bool,
509-
count: uint)
510-
-> ~[~str] {
511-
split_inner(s, sepfn, count, true, true)
507+
pub fn each_splitn(s: &str, sepfn: &fn(char) -> bool, count: uint, it: &fn(&str) -> bool) {
508+
each_split_inner(s, sepfn, count, true, true, it)
512509
}
513510

514511
/// Like `split`, but omits empty strings from the returned vector
515-
pub fn split_nonempty(s: &str, sepfn: &fn(char) -> bool) -> ~[~str] {
516-
split_inner(s, sepfn, len(s), false, false)
517-
}
518-
519-
520-
/**
521-
* Like `split`, but a trailing empty string is omitted
522-
* (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
523-
*/
524-
pub fn split_no_trailing(s: &str, sepfn: &fn(char) -> bool) -> ~[~str] {
525-
split_inner(s, sepfn, len(s), true, false)
512+
pub fn each_split_nonempty(s: &str, sepfn: &fn(char) -> bool, it: &fn(&str) -> bool) {
513+
each_split_inner(s, sepfn, len(s), false, false, it)
526514
}
527515

528-
fn split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
529-
allow_empty: bool, allow_trailing_empty: bool) -> ~[~str] {
516+
pure fn each_split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint,
517+
allow_empty: bool, allow_trailing_empty: bool), it: &fn(&str) -> bool) {
530518
let l = len(s);
531-
let mut result = ~[], i = 0u, start = 0u, done = 0u;
519+
let mut i = 0u, start = 0u, done = 0u;
532520
while i < l && done < count {
533521
let CharRange {ch, next} = char_range_at(s, i);
534522
if sepfn(ch) {
535523
if allow_empty || start < i {
536-
unsafe {
537-
result.push(raw::slice_bytes_unique(s, start, i));
538-
}
524+
if !it( unsafe{ raw::slice_bytes(s, start, i) } ) { return; }
539525
}
540526
start = next;
541527
done += 1u;
542528
}
543529
i = next;
544530
}
545531
if allow_trailing_empty || start < l {
546-
unsafe {
547-
result.push(raw::slice_bytes_unique(s, start, l));
548-
}
532+
if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return; }
549533
}
550-
result
551534
}
552535

553536
// See Issue #1932 for why this is a naive search
@@ -596,22 +579,18 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
596579
* fail_unless!(["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", "."))
597580
* ~~~
598581
*/
599-
pub fn split_str(s: &'a str, sep: &'b str) -> ~[~str] {
600-
let mut result = ~[];
582+
pub fn each_split_str(s: &'a str, sep: &'b str, it: &fn(&str) -> bool) {
601583
do iter_between_matches(s, sep) |from, to| {
602-
unsafe { result.push(raw::slice_bytes_unique(s, from, to)); }
584+
if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; }
603585
}
604-
result
605586
}
606587

607-
pub fn split_str_nonempty(s: &'a str, sep: &'b str) -> ~[~str] {
608-
let mut result = ~[];
588+
pub fn each_split_str_nonempty(s: &'a str, sep: &'b str, it: &fn(&str) -> bool) {
609589
do iter_between_matches(s, sep) |from, to| {
610590
if to > from {
611-
unsafe { result.push(raw::slice_bytes_unique(s, from, to)); }
591+
if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; }
612592
}
613593
}
614-
result
615594
}
616595

617596
/// Levenshtein Distance between two strings
@@ -651,34 +630,32 @@ pub fn levdistance(s: &str, t: &str) -> uint {
651630
/**
652631
* Splits a string into a vector of the substrings separated by LF ('\n').
653632
*/
654-
pub fn lines(s: &str) -> ~[~str] {
655-
split_char_no_trailing(s, '\n')
656-
}
633+
pub fn each_line(s: &str, it: &fn(&str) -> bool) { each_split_char(s, '\n', it) }
657634

658635
/**
659636
* Splits a string into a vector of the substrings separated by LF ('\n')
660637
* and/or CR LF ("\r\n")
661638
*/
662-
pub fn lines_any(s: &str) -> ~[~str] {
663-
vec::map(lines(s), |s| {
664-
let l = len(*s);
665-
let mut cp = copy *s;
639+
pub fn each_line_any(s: &str, it: &fn(&str) -> bool) {
640+
for each_line(s) |s| {
641+
let l = s.len();
666642
if l > 0u && s[l - 1u] == '\r' as u8 {
667-
unsafe { raw::set_len(&mut cp, l - 1u); }
643+
if !it( unsafe { raw::slice_bytes(s, 0, l - 1) } ) { return; }
644+
} else {
645+
if !it( s ) { return; }
668646
}
669-
cp
670-
})
647+
}
671648
}
672649

673650
/// Splits a string into a vector of the substrings separated by whitespace
674-
pub fn words(s: &str) -> ~[~str] {
675-
split_nonempty(s, char::is_whitespace)
651+
pub fn each_word(s: &str, it: &fn(&str) -> bool) {
652+
each_split_nonempty(s, |c| char::is_whitespace(c), it)
676653
}
677654

678655
/** Split a string into a vector of substrings,
679-
* each of which is less than a limit
656+
* each of which is less bytes long than a limit
680657
*/
681-
pub fn split_within(ss: &str, lim: uint) -> ~[~str] {
658+
pub fn each_split_within(ss: &str, lim: uint, it: &fn(&str) -> bool) {
682659
let words = str::words(ss);
683660

684661
// empty?
@@ -705,6 +682,22 @@ pub fn split_within(ss: &str, lim: uint) -> ~[~str] {
705682
if row != ~"" { rows.push(row); }
706683

707684
rows
685+
// NOTE: Finish change here
686+
687+
let mut last_slice_i = 0, last_word_i = 0, word_start = true;
688+
for each_chari(s) |i, c| {
689+
if (i - last_slice_i) <= lim {
690+
if char::is_whitespace(c) {
691+
692+
} else {
693+
694+
}
695+
} else {
696+
697+
}
698+
699+
700+
}
708701
}
709702

710703

@@ -997,10 +990,17 @@ pub fn eachi_reverse(s: &str, it: &fn(uint, u8) -> bool) {
997990
}
998991
}
999992

1000-
/// Iterates over the chars in a string
993+
994+
/// Iterate over each char of a string, without allocating
1001995
#[inline(always)]
1002996
pub fn each_char(s: &str, it: &fn(char) -> bool) {
1003-
each_chari(s, |_i, c| it(c))
997+
let mut i = 0;
998+
let len = len(s);
999+
while i < len {
1000+
let CharRange {ch, next} = char_range_at(s, i);
1001+
if !it(ch) { return; }
1002+
i = next;
1003+
}
10041004
}
10051005

10061006
/// Iterates over the chars in a string, with indices
@@ -1038,31 +1038,34 @@ pub fn each_chari_reverse(s: &str, it: &fn(uint, char) -> bool) {
10381038
}
10391039
}
10401040

1041-
/// Apply a function to each substring after splitting by character
1041+
/////////////////////////////////////////////////////////////////////////////////////////////////
1042+
// NOTE: Remove afterwards
1043+
/* /// Apply a function to each substring after splitting by character
10421044
pub fn split_char_each(ss: &str, cc: char, ff: &fn(v: &str) -> bool) {
10431045
vec::each(split_char(ss, cc), |s| ff(*s))
10441046
}
10451047
1046-
/**
1048+
**
10471049
* Apply a function to each substring after splitting by character, up to
10481050
* `count` times
1049-
*/
1051+
*
10501052
pub fn splitn_char_each(ss: &str, sep: char, count: uint,
10511053
ff: &fn(v: &str) -> bool) {
10521054
vec::each(splitn_char(ss, sep, count), |s| ff(*s))
10531055
}
10541056
1055-
/// Apply a function to each word
1057+
/ Apply a function to each word
10561058
pub fn words_each(ss: &str, ff: &fn(v: &str) -> bool) {
10571059
vec::each(words(ss), |s| ff(*s))
10581060
}
10591061
1060-
/**
1062+
**
10611063
* Apply a function to each line (by '\n')
1062-
*/
1064+
*
10631065
pub fn lines_each(ss: &str, ff: &fn(v: &str) -> bool) {
10641066
vec::each(lines(ss), |s| ff(*s))
1065-
}
1067+
} */
1068+
/////////////////////////////////////////////////////////////////////////////////////////////////
10661069

10671070
/*
10681071
Section: Searching
@@ -2511,7 +2514,7 @@ impl OwnedStr for ~str {
25112514
impl Clone for ~str {
25122515
#[inline(always)]
25132516
fn clone(&self) -> ~str {
2514-
self.to_str() // hilarious
2517+
from_slice(*self)
25152518
}
25162519
}
25172520

0 commit comments

Comments
 (0)