Skip to content

Commit 9a16179

Browse files
committed
Added str::from_slice_with_null() to cast a &[u8] to &str
Added str::subslice_offset() Renamed slice_bytes_uniqe to slice_bytes_owned
1 parent 11f5f73 commit 9a16179

File tree

1 file changed

+128
-8
lines changed

1 file changed

+128
-8
lines changed

src/libcore/str.rs

Lines changed: 128 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ Section: Creating a string
3838
*/
3939

4040
/**
41-
* Convert a vector of bytes to a UTF-8 string
41+
* Convert a vector of bytes to a new UTF-8 string
4242
*
4343
* # Failure
4444
*
@@ -49,9 +49,26 @@ pub fn from_bytes(vv: &const [u8]) -> ~str {
4949
return unsafe { raw::from_bytes(vv) };
5050
}
5151

52+
/**
53+
* Convert a vector of bytes to a UTF-8 string.
54+
* The vector needs to be one byte longer than the string, and end with a 0 byte.
55+
*
56+
* Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str.
57+
*
58+
* # Failure
59+
*
60+
* Fails if invalid UTF-8
61+
* Fails if not null terminated
62+
*/
63+
pub fn from_bytes_with_null<'a>(vv: &'a [u8]) -> &'a str {
64+
assert!(vv[vv.len() - 1] == 0);
65+
assert!(is_utf8(vv));
66+
return unsafe { raw::from_bytes_with_null(vv) };
67+
}
68+
5269
/// Copy a slice into a new unique str
5370
pub fn from_slice(s: &str) -> ~str {
54-
unsafe { raw::slice_bytes_unique(s, 0, len(s)) }
71+
unsafe { raw::slice_bytes_owned(s, 0, len(s)) }
5572
}
5673

5774
impl ToStr for ~str {
@@ -279,7 +296,7 @@ pub fn pop_char(s: &mut ~str) -> char {
279296
*/
280297
pub fn shift_char(s: &mut ~str) -> char {
281298
let CharRange {ch, next} = char_range_at(*s, 0u);
282-
*s = unsafe { raw::slice_bytes_unique(*s, next, len(*s)) };
299+
*s = unsafe { raw::slice_bytes_owned(*s, next, len(*s)) };
283300
return ch;
284301
}
285302

@@ -784,9 +801,9 @@ pub fn replace(s: &str, from: &str, to: &str) -> ~str {
784801
if first {
785802
first = false;
786803
} else {
787-
unsafe { push_str(&mut result, to); }
804+
push_str(&mut result, to);
788805
}
789-
unsafe { push_str(&mut result, raw::slice_bytes_unique(s, start, end)); }
806+
push_str(&mut result, unsafe{raw::slice_bytes(s, start, end)});
790807
}
791808
result
792809
}
@@ -2037,6 +2054,25 @@ pub fn as_buf<T>(s: &str, f: &fn(*u8, uint) -> T) -> T {
20372054
}
20382055
}
20392056

2057+
/**
2058+
* Returns the byte offset of an inner slice relative to an enclosing outer slice
2059+
*/
2060+
#[inline(always)]
2061+
pub fn subslice_offset(outer: &str, inner: &str) -> uint {
2062+
do as_buf(outer) |a, a_len| {
2063+
do as_buf(inner) |b, b_len| {
2064+
let a_start: uint, a_end: uint, b_start: uint, b_end: uint;
2065+
unsafe {
2066+
a_start = cast::transmute(a); a_end = a_len + cast::transmute(a);
2067+
b_start = cast::transmute(b); b_end = b_len + cast::transmute(b);
2068+
}
2069+
assert!(a_start <= b_start);
2070+
assert!(b_end <= a_end);
2071+
b_start - a_start
2072+
}
2073+
}
2074+
}
2075+
20402076
/**
20412077
* Reserves capacity for exactly `n` bytes in the given string, not including
20422078
* the null terminator.
@@ -2158,13 +2194,20 @@ pub mod raw {
21582194
from_buf_len(::cast::reinterpret_cast(&c_str), len)
21592195
}
21602196

2161-
/// Converts a vector of bytes to a string.
2197+
/// Converts a vector of bytes to a new owned string.
21622198
pub unsafe fn from_bytes(v: &const [u8]) -> ~str {
21632199
do vec::as_const_buf(v) |buf, len| {
21642200
from_buf_len(buf, len)
21652201
}
21662202
}
21672203

2204+
/// Converts a vector of bytes to a string.
2205+
/// The byte slice needs to contain valid utf8 and needs to be one byte longer than
2206+
/// the string, if possible ending in a 0 byte.
2207+
pub unsafe fn from_bytes_with_null<'a>(v: &'a [u8]) -> &'a str {
2208+
cast::transmute(v)
2209+
}
2210+
21682211
/// Converts a byte to a string.
21692212
pub unsafe fn from_byte(u: u8) -> ~str { raw::from_bytes([u]) }
21702213

@@ -2186,7 +2229,7 @@ pub mod raw {
21862229
* If begin is greater than end.
21872230
* If end is greater than the length of the string.
21882231
*/
2189-
pub unsafe fn slice_bytes_unique(s: &str, begin: uint, end: uint) -> ~str {
2232+
pub unsafe fn slice_bytes_owned(s: &str, begin: uint, end: uint) -> ~str {
21902233
do as_buf(s) |sbuf, n| {
21912234
assert!((begin <= end));
21922235
assert!((end <= n));
@@ -2258,7 +2301,7 @@ pub mod raw {
22582301
let len = len(*s);
22592302
assert!((len > 0u));
22602303
let b = s[0];
2261-
*s = unsafe { raw::slice_bytes_unique(*s, 1u, len) };
2304+
*s = unsafe { raw::slice_bytes_owned(*s, 1u, len) };
22622305
return b;
22632306
}
22642307

@@ -3289,6 +3332,66 @@ mod tests {
32893332
let _x = from_bytes(bb);
32903333
}
32913334
3335+
#[test]
3336+
fn test_unsafe_from_bytes_with_null() {
3337+
let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
3338+
let b = unsafe { raw::from_bytes_with_null(a) };
3339+
assert_eq!(b, "AAAAAAA");
3340+
}
3341+
3342+
#[test]
3343+
fn test_from_bytes_with_null() {
3344+
let ss = "ศไทย中华Việt Nam";
3345+
let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
3346+
0xe0_u8, 0xb9_u8, 0x84_u8,
3347+
0xe0_u8, 0xb8_u8, 0x97_u8,
3348+
0xe0_u8, 0xb8_u8, 0xa2_u8,
3349+
0xe4_u8, 0xb8_u8, 0xad_u8,
3350+
0xe5_u8, 0x8d_u8, 0x8e_u8,
3351+
0x56_u8, 0x69_u8, 0xe1_u8,
3352+
0xbb_u8, 0x87_u8, 0x74_u8,
3353+
0x20_u8, 0x4e_u8, 0x61_u8,
3354+
0x6d_u8, 0x0_u8];
3355+
3356+
assert_eq!(ss, from_bytes_with_null(bb));
3357+
}
3358+
3359+
#[test]
3360+
#[should_fail]
3361+
#[ignore(cfg(windows))]
3362+
fn test_from_bytes_with_null_fail() {
3363+
let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
3364+
0xe0_u8, 0xb9_u8, 0x84_u8,
3365+
0xe0_u8, 0xb8_u8, 0x97_u8,
3366+
0xe0_u8, 0xb8_u8, 0xa2_u8,
3367+
0xe4_u8, 0xb8_u8, 0xad_u8,
3368+
0xe5_u8, 0x8d_u8, 0x8e_u8,
3369+
0x56_u8, 0x69_u8, 0xe1_u8,
3370+
0xbb_u8, 0x87_u8, 0x74_u8,
3371+
0x20_u8, 0x4e_u8, 0x61_u8,
3372+
0x6d_u8, 0x0_u8];
3373+
3374+
let _x = from_bytes_with_null(bb);
3375+
}
3376+
3377+
#[test]
3378+
#[should_fail]
3379+
#[ignore(cfg(windows))]
3380+
fn test_from_bytes_with_null_fail_2() {
3381+
let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
3382+
0xe0_u8, 0xb9_u8, 0x84_u8,
3383+
0xe0_u8, 0xb8_u8, 0x97_u8,
3384+
0xe0_u8, 0xb8_u8, 0xa2_u8,
3385+
0xe4_u8, 0xb8_u8, 0xad_u8,
3386+
0xe5_u8, 0x8d_u8, 0x8e_u8,
3387+
0x56_u8, 0x69_u8, 0xe1_u8,
3388+
0xbb_u8, 0x87_u8, 0x74_u8,
3389+
0x20_u8, 0x4e_u8, 0x61_u8,
3390+
0x6d_u8, 0x60_u8];
3391+
3392+
let _x = from_bytes_with_null(bb);
3393+
}
3394+
32923395
#[test]
32933396
fn test_from_buf() {
32943397
unsafe {
@@ -3351,6 +3454,23 @@ mod tests {
33513454
}
33523455
}
33533456
3457+
#[test]
3458+
fn test_subslice_offset() {
3459+
let a = "kernelsprite";
3460+
let b = slice(a, 7, len(a));
3461+
let c = slice(a, 0, len(a) - 6);
3462+
assert!(subslice_offset(a, b) == 7);
3463+
assert!(subslice_offset(a, c) == 0);
3464+
}
3465+
3466+
#[test]
3467+
#[should_fail]
3468+
fn test_subslice_offset_2() {
3469+
let a = "alchemiter";
3470+
let b = "cruxtruder";
3471+
subslice_offset(a, b);
3472+
}
3473+
33543474
#[test]
33553475
fn vec_str_conversions() {
33563476
let s1: ~str = ~"All mimsy were the borogoves";

0 commit comments

Comments
 (0)