@@ -56,15 +56,15 @@ pub fn from_slice(s: &str) -> ~str {
56
56
57
57
impl ToStr for ~str {
58
58
#[ inline( always) ]
59
- fn to_str ( & self ) -> ~str { copy * self }
59
+ fn to_str ( & self ) -> ~str { from_slice ( * self ) }
60
60
}
61
61
impl ToStr for & ' self str {
62
62
#[ inline( always) ]
63
- fn to_str ( & self ) -> ~str { :: str :: from_slice ( * self ) }
63
+ fn to_str ( & self ) -> ~str { from_slice ( * self ) }
64
64
}
65
65
impl ToStr for @str {
66
66
#[ inline( always) ]
67
- fn to_str ( & self ) -> ~str { :: str :: from_slice ( * self ) }
67
+ fn to_str ( & self ) -> ~str { from_slice ( * self ) }
68
68
}
69
69
70
70
/**
@@ -383,7 +383,7 @@ Section: Transforming strings
383
383
*/
384
384
385
385
/**
386
- * Converts a string to a vector of bytes
386
+ * Converts a string to a unique vector of bytes
387
387
*
388
388
* The result vector is not null-terminated.
389
389
*/
@@ -403,22 +403,19 @@ pub fn byte_slice<T>(s: &str, f: &fn(v: &[u8]) -> T) -> T {
403
403
}
404
404
}
405
405
406
- /// Convert a string to a vector of characters
407
- pub fn chars ( s : & str ) -> ~[ char ] {
408
- let mut buf = ~[ ] , i = 0 ;
409
- let len = len ( s) ;
410
- while i < len {
411
- let CharRange { ch, next} = char_range_at ( s, i) ;
412
- unsafe { buf. push ( ch) ; }
413
- i = next;
406
+ /// Convert a string to a unique vector of characters
407
+ pub fn to_chars ( s : & str ) -> ~[ char ] {
408
+ let mut buf = ~[ ] ;
409
+ for each_char( s) |c| {
410
+ buf. push ( c) ;
414
411
}
415
412
buf
416
413
}
417
414
418
415
/**
419
416
* Take a substring of another.
420
417
*
421
- * Returns a string containing `n` characters starting at byte offset
418
+ * Returns a slice pointing at `n` characters starting from byte offset
422
419
* `begin`.
423
420
*/
424
421
pub fn substr ( s : & ' a str , begin : uint , n : uint ) -> & ' a str {
@@ -437,10 +434,17 @@ pub fn slice(s: &'a str, begin: uint, end: uint) -> &'a str {
437
434
unsafe { raw:: slice_bytes ( s, begin, end) }
438
435
}
439
436
440
- /// Splits a string into substrings at each occurrence of a given
441
- /// character.
442
- pub fn split_char ( s : & str , sep : char ) -> ~[ ~str ] {
443
- split_char_inner ( s, sep, len ( s) , true , true )
437
+ /// Splits a string into substrings at each occurrence of a given character
438
+ pub fn each_split_char ( s : & str , sep : char , it : & fn ( & str ) -> bool ) {
439
+ each_split_char_inner ( s, sep, len ( s) , true , true , it)
440
+ }
441
+
442
+ /**
443
+ * Like `split_char`, but a trailing empty string is omitted
444
+ * (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
445
+ */
446
+ pub fn each_split_char_no_trailing ( s : & str , sep : char , it : & fn ( & str ) -> bool ) {
447
+ each_split_char_inner ( s, sep, len ( s) , true , false , it)
444
448
}
445
449
446
450
/**
@@ -449,35 +453,25 @@ pub fn split_char(s: &str, sep: char) -> ~[~str] {
449
453
*
450
454
* The byte must be a valid UTF-8/ASCII byte
451
455
*/
452
- pub fn splitn_char ( s : & str , sep : char , count : uint ) -> ~ [ ~ str ] {
453
- split_char_inner ( s, sep, count, true , true )
456
+ pub fn each_splitn_char ( s : & str , sep : char , count : uint , it : & fn ( & str ) -> bool ) {
457
+ each_split_char_inner ( s, sep, count, true , true , it )
454
458
}
455
459
456
460
/// Like `split_char`, but omits empty strings from the returned vector
457
- pub fn split_char_nonempty ( s : & str , sep : char ) -> ~[ ~str ] {
458
- split_char_inner ( s, sep, len ( s) , false , false )
459
- }
460
-
461
- /**
462
- * Like `split_char`, but a trailing empty string is omitted
463
- * (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
464
- */
465
- pub fn split_char_no_trailing ( s : & str , sep : char ) -> ~[ ~str ] {
466
- split_char_inner ( s, sep, len ( s) , true , false )
461
+ pub fn each_split_char_nonempty ( s : & str , sep : char , it : & fn ( & str ) -> bool ) {
462
+ each_split_char_inner ( s, sep, len ( s) , false , false , it)
467
463
}
468
464
469
- fn split_char_inner ( s : & str , sep : char , count : uint , allow_empty : bool ,
470
- allow_trailing_empty : bool ) -> ~ [ ~ str ] {
465
+ fn each_split_char_inner ( s : & str , sep : char , count : uint , allow_empty : bool ,
466
+ allow_trailing_empty : bool ) , it : & fn ( & str ) -> bool ) {
471
467
if sep < 128 u as char {
472
468
let b = sep as u8 , l = len ( s) ;
473
- let mut result = ~ [ ] , done = 0 u;
469
+ let mut done = 0 u;
474
470
let mut i = 0 u, start = 0 u;
475
471
while i < l && done < count {
476
472
if s[ i] == b {
477
473
if allow_empty || start < i {
478
- unsafe {
479
- result. push ( raw:: slice_bytes_unique ( s, start, i) ) ;
480
- }
474
+ if !it ( unsafe { raw:: slice_bytes ( s, start, i) } ) { return ; }
481
475
}
482
476
start = i + 1 u;
483
477
done += 1 u;
@@ -486,68 +480,57 @@ fn split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool,
486
480
}
487
481
// only push a non-empty trailing substring
488
482
if allow_trailing_empty || start < l {
489
- unsafe { result . push ( raw:: slice_bytes_unique ( s, start, l) ) } ;
483
+ if ! it ( unsafe { raw:: slice_bytes ( s, start, l) } ) { return ; }
490
484
}
491
- result
492
485
} else {
493
- split_inner ( s, |cur| cur == sep, count, allow_empty, allow_trailing_empty)
486
+ each_split_inner ( s, |cur| cur == sep, count, allow_empty, allow_trailing_empty, it )
494
487
}
495
488
}
496
489
497
-
498
490
/// Splits a string into substrings using a character function
499
- pub fn split ( s : & str , sepfn : & fn ( char ) -> bool ) -> ~[ ~str ] {
500
- split_inner ( s, sepfn, len ( s) , true , true )
491
+ pub fn each_split ( s : & str , sepfn : & fn ( char ) -> bool , it : & fn ( & str ) -> bool ) {
492
+ each_split_inner ( s, sepfn, len ( s) , true , true , it)
493
+ }
494
+
495
+ /**
496
+ * Like `split`, but a trailing empty string is omitted
497
+ * (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
498
+ */
499
+ pub fn each_split_no_trailing ( s : & str , sepfn : & fn ( char ) -> bool , it : & fn ( & str ) -> bool ) {
500
+ each_split_inner ( s, sepfn, len ( s) , true , false , it)
501
501
}
502
502
503
503
/**
504
504
* Splits a string into substrings using a character function, cutting at
505
505
* most `count` times.
506
506
*/
507
- pub fn splitn ( s : & str ,
508
- sepfn : & fn ( char ) -> bool ,
509
- count : uint )
510
- -> ~[ ~str ] {
511
- split_inner ( s, sepfn, count, true , true )
507
+ pub fn each_splitn ( s : & str , sepfn : & fn ( char ) -> bool , count : uint , it : & fn ( & str ) -> bool ) {
508
+ each_split_inner ( s, sepfn, count, true , true , it)
512
509
}
513
510
514
511
/// Like `split`, but omits empty strings from the returned vector
515
- pub fn split_nonempty ( s : & str , sepfn : & fn ( char ) -> bool ) -> ~[ ~str ] {
516
- split_inner ( s, sepfn, len ( s) , false , false )
517
- }
518
-
519
-
520
- /**
521
- * Like `split`, but a trailing empty string is omitted
522
- * (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`)
523
- */
524
- pub fn split_no_trailing ( s : & str , sepfn : & fn ( char ) -> bool ) -> ~[ ~str ] {
525
- split_inner ( s, sepfn, len ( s) , true , false )
512
+ pub fn each_split_nonempty ( s : & str , sepfn : & fn ( char ) -> bool , it : & fn ( & str ) -> bool ) {
513
+ each_split_inner ( s, sepfn, len ( s) , false , false , it)
526
514
}
527
515
528
- fn split_inner ( s : & str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
529
- allow_empty : bool , allow_trailing_empty : bool ) -> ~ [ ~ str ] {
516
+ pure fn each_split_inner ( s : & str , sepfn : & fn ( cc : char ) -> bool , count : uint ,
517
+ allow_empty : bool , allow_trailing_empty : bool ) , it : & fn ( & str ) -> bool ) {
530
518
let l = len ( s) ;
531
- let mut result = ~ [ ] , i = 0 u, start = 0 u, done = 0 u;
519
+ let mut i = 0 u, start = 0 u, done = 0 u;
532
520
while i < l && done < count {
533
521
let CharRange { ch, next} = char_range_at ( s, i) ;
534
522
if sepfn ( ch) {
535
523
if allow_empty || start < i {
536
- unsafe {
537
- result. push ( raw:: slice_bytes_unique ( s, start, i) ) ;
538
- }
524
+ if !it ( unsafe { raw:: slice_bytes ( s, start, i) } ) { return ; }
539
525
}
540
526
start = next;
541
527
done += 1 u;
542
528
}
543
529
i = next;
544
530
}
545
531
if allow_trailing_empty || start < l {
546
- unsafe {
547
- result. push ( raw:: slice_bytes_unique ( s, start, l) ) ;
548
- }
532
+ if !it ( unsafe { raw:: slice_bytes ( s, start, l) } ) { return ; }
549
533
}
550
- result
551
534
}
552
535
553
536
// See Issue #1932 for why this is a naive search
@@ -596,22 +579,18 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) {
596
579
* fail_unless!(["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", "."))
597
580
* ~~~
598
581
*/
599
- pub fn split_str ( s : & ' a str , sep : & ' b str ) -> ~[ ~str ] {
600
- let mut result = ~[ ] ;
582
+ pub fn each_split_str ( s : & ' a str , sep : & ' b str , it : & fn ( & str ) -> bool ) {
601
583
do iter_between_matches ( s, sep) |from, to| {
602
- unsafe { result . push ( raw:: slice_bytes_unique ( s, from, to) ) ; }
584
+ if ! it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return ; }
603
585
}
604
- result
605
586
}
606
587
607
- pub fn split_str_nonempty ( s : & ' a str , sep : & ' b str ) -> ~[ ~str ] {
608
- let mut result = ~[ ] ;
588
+ pub fn each_split_str_nonempty ( s : & ' a str , sep : & ' b str , it : & fn ( & str ) -> bool ) {
609
589
do iter_between_matches ( s, sep) |from, to| {
610
590
if to > from {
611
- unsafe { result . push ( raw:: slice_bytes_unique ( s, from, to) ) ; }
591
+ if ! it ( unsafe { raw:: slice_bytes ( s, from, to) } ) { return ; }
612
592
}
613
593
}
614
- result
615
594
}
616
595
617
596
/// Levenshtein Distance between two strings
@@ -651,34 +630,32 @@ pub fn levdistance(s: &str, t: &str) -> uint {
651
630
/**
652
631
* Splits a string into a vector of the substrings separated by LF ('\n').
653
632
*/
654
- pub fn lines ( s : & str ) -> ~[ ~str ] {
655
- split_char_no_trailing ( s, '\n' )
656
- }
633
+ pub fn each_line ( s : & str , it : & fn ( & str ) -> bool ) { each_split_char ( s, '\n' , it) }
657
634
658
635
/**
659
636
* Splits a string into a vector of the substrings separated by LF ('\n')
660
637
* and/or CR LF ("\r\n")
661
638
*/
662
- pub fn lines_any ( s : & str ) -> ~[ ~str ] {
663
- vec:: map ( lines ( s) , |s| {
664
- let l = len ( * s) ;
665
- let mut cp = copy * s;
639
+ pub fn each_line_any ( s : & str , it : & fn ( & str ) -> bool ) {
640
+ for each_line( s) |s| {
641
+ let l = s. len ( ) ;
666
642
if l > 0 u && s[ l - 1 u] == '\r' as u8 {
667
- unsafe { raw:: set_len ( & mut cp, l - 1 u) ; }
643
+ if !it ( unsafe { raw:: slice_bytes ( s, 0 , l - 1 ) } ) { return ; }
644
+ } else {
645
+ if !it ( s ) { return ; }
668
646
}
669
- cp
670
- } )
647
+ }
671
648
}
672
649
673
650
/// Splits a string into a vector of the substrings separated by whitespace
674
- pub fn words ( s : & str ) -> ~ [ ~ str ] {
675
- split_nonempty ( s, char:: is_whitespace)
651
+ pub fn each_word ( s : & str , it : & fn ( & str ) -> bool ) {
652
+ each_split_nonempty ( s, |c| char:: is_whitespace ( c ) , it )
676
653
}
677
654
678
655
/** Split a string into a vector of substrings,
679
- * each of which is less than a limit
656
+ * each of which is less bytes long than a limit
680
657
*/
681
- pub fn split_within ( ss : & str , lim : uint ) -> ~ [ ~ str ] {
658
+ pub fn each_split_within ( ss : & str , lim : uint , it : & fn ( & str ) -> bool ) {
682
659
let words = str:: words ( ss) ;
683
660
684
661
// empty?
@@ -705,6 +682,22 @@ pub fn split_within(ss: &str, lim: uint) -> ~[~str] {
705
682
if row != ~" " { rows. push ( row) ; }
706
683
707
684
rows
685
+ // NOTE: Finish change here
686
+
687
+ let mut last_slice_i = 0 , last_word_i = 0 , word_start = true ;
688
+ for each_chari( s) |i, c| {
689
+ if ( i - last_slice_i) <= lim {
690
+ if char:: is_whitespace ( c) {
691
+
692
+ } else {
693
+
694
+ }
695
+ } else {
696
+
697
+ }
698
+
699
+
700
+ }
708
701
}
709
702
710
703
@@ -997,10 +990,17 @@ pub fn eachi_reverse(s: &str, it: &fn(uint, u8) -> bool) {
997
990
}
998
991
}
999
992
1000
- /// Iterates over the chars in a string
993
+
994
+ /// Iterate over each char of a string, without allocating
1001
995
#[ inline( always) ]
1002
996
pub fn each_char ( s : & str , it : & fn ( char ) -> bool ) {
1003
- each_chari ( s, |_i, c| it ( c) )
997
+ let mut i = 0 ;
998
+ let len = len ( s) ;
999
+ while i < len {
1000
+ let CharRange { ch, next} = char_range_at ( s, i) ;
1001
+ if !it ( ch) { return ; }
1002
+ i = next;
1003
+ }
1004
1004
}
1005
1005
1006
1006
/// Iterates over the chars in a string, with indices
@@ -1038,31 +1038,34 @@ pub fn each_chari_reverse(s: &str, it: &fn(uint, char) -> bool) {
1038
1038
}
1039
1039
}
1040
1040
1041
- /// Apply a function to each substring after splitting by character
1041
+ /////////////////////////////////////////////////////////////////////////////////////////////////
1042
+ // NOTE: Remove afterwards
1043
+ /* /// Apply a function to each substring after splitting by character
1042
1044
pub fn split_char_each(ss: &str, cc: char, ff: &fn(v: &str) -> bool) {
1043
1045
vec::each(split_char(ss, cc), |s| ff(*s))
1044
1046
}
1045
1047
1046
- / **
1048
+ **
1047
1049
* Apply a function to each substring after splitting by character, up to
1048
1050
* `count` times
1049
- */
1051
+ *
1050
1052
pub fn splitn_char_each(ss: &str, sep: char, count: uint,
1051
1053
ff: &fn(v: &str) -> bool) {
1052
1054
vec::each(splitn_char(ss, sep, count), |s| ff(*s))
1053
1055
}
1054
1056
1055
- /// Apply a function to each word
1057
+ / Apply a function to each word
1056
1058
pub fn words_each(ss: &str, ff: &fn(v: &str) -> bool) {
1057
1059
vec::each(words(ss), |s| ff(*s))
1058
1060
}
1059
1061
1060
- / **
1062
+ **
1061
1063
* Apply a function to each line (by '\n')
1062
- */
1064
+ *
1063
1065
pub fn lines_each(ss: &str, ff: &fn(v: &str) -> bool) {
1064
1066
vec::each(lines(ss), |s| ff(*s))
1065
- }
1067
+ } */
1068
+ /////////////////////////////////////////////////////////////////////////////////////////////////
1066
1069
1067
1070
/*
1068
1071
Section: Searching
@@ -2511,7 +2514,7 @@ impl OwnedStr for ~str {
2511
2514
impl Clone for ~str {
2512
2515
#[inline(always)]
2513
2516
fn clone(&self) -> ~str {
2514
- self.to_str() // hilarious
2517
+ from_slice(*self)
2515
2518
}
2516
2519
}
2517
2520
0 commit comments