Skip to content

Commit 4370188

Browse files
committed
Merge pull request #1860 from erickt/master
add str::find_from_bytes and str::index_from_bytes
2 parents 9e8fc36 + 23703c0 commit 4370188

File tree

2 files changed

+96
-47
lines changed

2 files changed

+96
-47
lines changed

src/comp/syntax/codemap.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,8 @@ fn span_to_lines(sp: span, cm: codemap::codemap) -> @file_lines {
157157

158158
fn get_line(fm: filemap, line: int) -> str unsafe {
159159
let begin: uint = fm.lines[line].byte - fm.start_pos.byte;
160-
let end = alt str::byte_index(*fm.src, '\n' as u8, begin) {
160+
let end = alt str::byte_index_from(*fm.src, '\n' as u8, begin,
161+
str::len(*fm.src)) {
161162
some(e) { e }
162163
none { str::len(*fm.src) }
163164
};

src/libcore/str.rs

Lines changed: 94 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ for correctness, but some UTF-8 unsafe functions are also provided.
99
For some heavy-duty uses, we recommend trying std::rope.
1010
*/
1111

12+
import option::{some, none};
13+
1214
export
1315
// Creating a string
1416
from_bytes,
@@ -69,9 +71,11 @@ export
6971
// Searching
7072
index,
7173
byte_index,
74+
byte_index_from,
7275
rindex,
7376
find,
7477
find_bytes,
78+
find_from_bytes,
7579
contains,
7680
starts_with,
7781
ends_with,
@@ -665,8 +669,8 @@ fn replace(s: str, from: str, to: str) : is_not_empty(from) -> str unsafe {
665669
} else {
666670
let idx;
667671
alt find_bytes(s, from) {
668-
option::some(x) { idx = x; }
669-
option::none { ret s; }
672+
some(x) { idx = x; }
673+
none { ret s; }
670674
}
671675
let before = unsafe::slice_bytes(s, 0u, idx as uint);
672676
let after = unsafe::slice_bytes(s, idx as uint + len_bytes(from),
@@ -842,28 +846,34 @@ fn index(ss: str, cc: char) -> option<uint> {
842846

843847
// found here?
844848
if ch == cc {
845-
ret option::some(cii);
849+
ret some(cii);
846850
}
847851

848852
cii += 1u;
849853
bii = next;
850854
}
851855

852856
// wasn't found
853-
ret option::none;
857+
ret none;
854858
}
855859

856860
// Function: byte_index
857861
//
858862
// Returns the index of the first matching byte
859863
// (as option some/none)
860-
fn byte_index(s: str, b: u8, start: uint) -> option<uint> {
861-
let i = start, l = len_bytes(s);
862-
while i < l {
863-
if s[i] == b { ret some(i); }
864-
i += 1u;
865-
}
866-
ret none;
864+
fn byte_index(s: str, b: u8) -> option<uint> {
865+
byte_index_from(s, b, 0u, len_bytes(s))
866+
}
867+
868+
// Function: byte_index_from
869+
//
870+
// Returns the index of the first matching byte within the range [`start`,
871+
// `end`).
872+
// (as option some/none)
873+
fn byte_index_from(s: str, b: u8, start: uint, end: uint) -> option<uint> {
874+
assert end <= len_bytes(s);
875+
876+
str::as_bytes(s) { |v| vec::position_from(v, start, end) { |x| x == b } }
867877
}
868878

869879
// Function: rindex
@@ -880,40 +890,50 @@ fn rindex(ss: str, cc: char) -> option<uint> {
880890

881891
// found here?
882892
if ch == cc {
883-
ret option::some(cii);
893+
ret some(cii);
884894
}
885895
}
886896

887897
// wasn't found
888-
ret option::none;
898+
ret none;
889899
}
890900

891901
//Function: find_bytes
892902
//
893903
// Find the char position of the first instance of one string
894904
// within another, or return option::none
905+
fn find_bytes(haystack: str, needle: str) -> option<uint> {
906+
find_from_bytes(haystack, needle, 0u, len_bytes(haystack))
907+
}
908+
909+
//Function: find_from_bytes
910+
//
911+
// Find the char position of the first instance of one string
912+
// within another, or return option::none
895913
//
896914
// FIXME: Boyer-Moore should be significantly faster
897-
fn find_bytes(haystack: str, needle: str) -> option<uint> {
898-
let haystack_len = len_bytes(haystack);
899-
let needle_len = len_bytes(needle);
915+
fn find_from_bytes(haystack: str, needle: str, start: uint, end:uint)
916+
-> option<uint> {
917+
assert end <= len_bytes(haystack);
918+
919+
let needle_len = len_bytes(needle);
900920

901-
if needle_len == 0u { ret option::some(0u); }
902-
if needle_len > haystack_len { ret option::none; }
921+
if needle_len == 0u { ret some(start); }
922+
if needle_len > end { ret none; }
903923

904924
fn match_at(haystack: str, needle: str, ii: uint) -> bool {
905925
let jj = ii;
906926
for c: u8 in needle { if haystack[jj] != c { ret false; } jj += 1u; }
907927
ret true;
908928
}
909929

910-
let ii = 0u;
911-
while ii <= haystack_len - needle_len {
912-
if match_at(haystack, needle, ii) { ret option::some(ii); }
930+
let ii = start;
931+
while ii <= end - needle_len {
932+
if match_at(haystack, needle, ii) { ret some(ii); }
913933
ii += 1u;
914934
}
915935

916-
ret option::none;
936+
ret none;
917937
}
918938

919939
// Function: find
@@ -922,8 +942,8 @@ fn find_bytes(haystack: str, needle: str) -> option<uint> {
922942
// within another, or return option::none
923943
fn find(haystack: str, needle: str) -> option<uint> {
924944
alt find_bytes(haystack, needle) {
925-
option::none { ret option::none; }
926-
option::some(nn) { ret option::some(b2c_pos(haystack, nn)); }
945+
none { ret none; }
946+
some(nn) { ret some(b2c_pos(haystack, nn)); }
927947
}
928948
}
929949

@@ -1522,18 +1542,18 @@ mod tests {
15221542

15231543
#[test]
15241544
fn test_index() {
1525-
assert ( index("hello", 'h') == option::some(0u));
1526-
assert ( index("hello", 'e') == option::some(1u));
1527-
assert ( index("hello", 'o') == option::some(4u));
1528-
assert ( index("hello", 'z') == option::none);
1545+
assert ( index("hello", 'h') == some(0u));
1546+
assert ( index("hello", 'e') == some(1u));
1547+
assert ( index("hello", 'o') == some(4u));
1548+
assert ( index("hello", 'z') == none);
15291549
}
15301550

15311551
#[test]
15321552
fn test_rindex() {
1533-
assert (rindex("hello", 'l') == option::some(3u));
1534-
assert (rindex("hello", 'o') == option::some(4u));
1535-
assert (rindex("hello", 'h') == option::some(0u));
1536-
assert (rindex("hello", 'z') == option::none);
1553+
assert (rindex("hello", 'l') == some(3u));
1554+
assert (rindex("hello", 'o') == some(4u));
1555+
assert (rindex("hello", 'h') == some(0u));
1556+
assert (rindex("hello", 'z') == none);
15371557
}
15381558

15391559
#[test]
@@ -1738,29 +1758,57 @@ mod tests {
17381758
#[test]
17391759
fn test_find_bytes() {
17401760
// byte positions
1741-
assert (find_bytes("banana", "apple pie") == option::none);
1742-
assert (find_bytes("", "") == option::some(0u));
1761+
assert (find_bytes("banana", "apple pie") == none);
1762+
assert (find_bytes("", "") == some(0u));
17431763

17441764
let data = "ประเทศไทย中华Việt Nam";
1745-
assert (find_bytes(data, "") == option::some(0u));
1746-
assert (find_bytes(data, "ประเ") == option::some( 0u));
1747-
assert (find_bytes(data, "ะเ") == option::some( 6u));
1748-
assert (find_bytes(data, "中华") == option::some(27u));
1749-
assert (find_bytes(data, "ไท华") == option::none);
1765+
assert (find_bytes(data, "") == some(0u));
1766+
assert (find_bytes(data, "ประเ") == some( 0u));
1767+
assert (find_bytes(data, "ะเ") == some( 6u));
1768+
assert (find_bytes(data, "中华") == some(27u));
1769+
assert (find_bytes(data, "ไท华") == none);
1770+
}
1771+
1772+
#[test]
1773+
fn test_find_from_bytes() {
1774+
// byte positions
1775+
assert (find_from_bytes("", "", 0u, 0u) == some(0u));
1776+
1777+
let data = "abcabc";
1778+
assert find_from_bytes(data, "ab", 0u, 6u) == some(0u);
1779+
assert find_from_bytes(data, "ab", 2u, 6u) == some(3u);
1780+
assert find_from_bytes(data, "ab", 2u, 4u) == none;
1781+
1782+
let data = "ประเทศไทย中华Việt Nam";
1783+
data += data;
1784+
assert find_from_bytes(data, "", 0u, 43u) == some(0u);
1785+
assert find_from_bytes(data, "", 6u, 43u) == some(6u);
1786+
1787+
assert find_from_bytes(data, "ประ", 0u, 43u) == some( 0u);
1788+
assert find_from_bytes(data, "ทศไ", 0u, 43u) == some(12u);
1789+
assert find_from_bytes(data, "ย中", 0u, 43u) == some(24u);
1790+
assert find_from_bytes(data, "iệt", 0u, 43u) == some(34u);
1791+
assert find_from_bytes(data, "Nam", 0u, 43u) == some(40u);
1792+
1793+
assert find_from_bytes(data, "ประ", 43u, 86u) == some(43u);
1794+
assert find_from_bytes(data, "ทศไ", 43u, 86u) == some(55u);
1795+
assert find_from_bytes(data, "ย中", 43u, 86u) == some(67u);
1796+
assert find_from_bytes(data, "iệt", 43u, 86u) == some(77u);
1797+
assert find_from_bytes(data, "Nam", 43u, 86u) == some(83u);
17501798
}
17511799

17521800
#[test]
17531801
fn test_find() {
17541802
// char positions
1755-
assert (find("banana", "apple pie") == option::none);
1756-
assert (find("", "") == option::some(0u));
1803+
assert (find("banana", "apple pie") == none);
1804+
assert (find("", "") == some(0u));
17571805

17581806
let data = "ประเทศไทย中华Việt Nam";
1759-
assert (find(data, "") == option::some(0u));
1760-
assert (find(data, "ประเ") == option::some(0u));
1761-
assert (find(data, "ะเ") == option::some(2u));
1762-
assert (find(data, "中华") == option::some(9u));
1763-
assert (find(data, "ไท华") == option::none);
1807+
assert (find(data, "") == some(0u));
1808+
assert (find(data, "ประเ") == some(0u));
1809+
assert (find(data, "ะเ") == some(2u));
1810+
assert (find(data, "中华") == some(9u));
1811+
assert (find(data, "ไท华") == none);
17641812
}
17651813

17661814
#[test]

0 commit comments

Comments
 (0)