@@ -38,7 +38,7 @@ Section: Creating a string
38
38
*/
39
39
40
40
/**
41
- * Convert a vector of bytes to a UTF-8 string
41
+ * Convert a vector of bytes to a new UTF-8 string
42
42
*
43
43
* # Failure
44
44
*
@@ -49,9 +49,26 @@ pub fn from_bytes(vv: &const [u8]) -> ~str {
49
49
return unsafe { raw:: from_bytes ( vv) } ;
50
50
}
51
51
52
+ /**
53
+ * Convert a vector of bytes to a UTF-8 string.
54
+ * The vector needs to be one byte longer than the string, and end with a 0 byte.
55
+ *
56
+ * Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str.
57
+ *
58
+ * # Failure
59
+ *
60
+ * Fails if invalid UTF-8
61
+ * Fails if not null terminated
62
+ */
63
+ pub fn from_bytes_with_null < ' a > ( vv : & ' a [ u8 ] ) -> & ' a str {
64
+ assert ! ( vv[ vv. len( ) - 1 ] == 0 ) ;
65
+ assert ! ( is_utf8( vv) ) ;
66
+ return unsafe { raw:: from_bytes_with_null ( vv) } ;
67
+ }
68
+
52
69
/// Copy a slice into a new unique str
53
70
pub fn from_slice ( s : & str ) -> ~str {
54
- unsafe { raw:: slice_bytes_unique ( s, 0 , len ( s) ) }
71
+ unsafe { raw:: slice_bytes_owned ( s, 0 , len ( s) ) }
55
72
}
56
73
57
74
impl ToStr for ~str {
@@ -279,7 +296,7 @@ pub fn pop_char(s: &mut ~str) -> char {
279
296
*/
280
297
pub fn shift_char ( s : & mut ~str ) -> char {
281
298
let CharRange { ch, next} = char_range_at ( * s, 0 u) ;
282
- * s = unsafe { raw:: slice_bytes_unique ( * s, next, len ( * s) ) } ;
299
+ * s = unsafe { raw:: slice_bytes_owned ( * s, next, len ( * s) ) } ;
283
300
return ch;
284
301
}
285
302
@@ -784,9 +801,9 @@ pub fn replace(s: &str, from: &str, to: &str) -> ~str {
784
801
if first {
785
802
first = false ;
786
803
} else {
787
- unsafe { push_str ( & mut result, to) ; }
804
+ push_str ( & mut result, to) ;
788
805
}
789
- unsafe { push_str ( & mut result, raw:: slice_bytes_unique ( s, start, end) ) ; }
806
+ push_str ( & mut result, unsafe { raw:: slice_bytes ( s, start, end) } ) ;
790
807
}
791
808
result
792
809
}
@@ -2037,6 +2054,25 @@ pub fn as_buf<T>(s: &str, f: &fn(*u8, uint) -> T) -> T {
2037
2054
}
2038
2055
}
2039
2056
2057
+ /**
2058
+ * Returns the byte offset of an inner slice relative to an enclosing outer slice
2059
+ */
2060
+ #[ inline( always) ]
2061
+ pub fn subslice_offset ( outer : & str , inner : & str ) -> uint {
2062
+ do as_buf ( outer) |a, a_len| {
2063
+ do as_buf ( inner) |b, b_len| {
2064
+ let a_start: uint , a_end : uint , b_start : uint , b_end : uint ;
2065
+ unsafe {
2066
+ a_start = cast:: transmute ( a) ; a_end = a_len + cast:: transmute ( a) ;
2067
+ b_start = cast:: transmute ( b) ; b_end = b_len + cast:: transmute ( b) ;
2068
+ }
2069
+ assert ! ( a_start <= b_start) ;
2070
+ assert ! ( b_end <= a_end) ;
2071
+ b_start - a_start
2072
+ }
2073
+ }
2074
+ }
2075
+
2040
2076
/**
2041
2077
* Reserves capacity for exactly `n` bytes in the given string, not including
2042
2078
* the null terminator.
@@ -2158,13 +2194,20 @@ pub mod raw {
2158
2194
from_buf_len ( :: cast:: reinterpret_cast ( & c_str) , len)
2159
2195
}
2160
2196
2161
- /// Converts a vector of bytes to a string.
2197
+ /// Converts a vector of bytes to a new owned string.
2162
2198
pub unsafe fn from_bytes ( v : & const [ u8 ] ) -> ~str {
2163
2199
do vec:: as_const_buf ( v) |buf, len| {
2164
2200
from_buf_len ( buf, len)
2165
2201
}
2166
2202
}
2167
2203
2204
+ /// Converts a vector of bytes to a string.
2205
+ /// The byte slice needs to contain valid utf8 and needs to be one byte longer than
2206
+ /// the string, if possible ending in a 0 byte.
2207
+ pub unsafe fn from_bytes_with_null < ' a > ( v : & ' a [ u8 ] ) -> & ' a str {
2208
+ cast:: transmute ( v)
2209
+ }
2210
+
2168
2211
/// Converts a byte to a string.
2169
2212
pub unsafe fn from_byte ( u : u8 ) -> ~str { raw:: from_bytes ( [ u] ) }
2170
2213
@@ -2186,7 +2229,7 @@ pub mod raw {
2186
2229
* If begin is greater than end.
2187
2230
* If end is greater than the length of the string.
2188
2231
*/
2189
- pub unsafe fn slice_bytes_unique ( s : & str , begin : uint , end : uint ) -> ~str {
2232
+ pub unsafe fn slice_bytes_owned ( s : & str , begin : uint , end : uint ) -> ~str {
2190
2233
do as_buf ( s) |sbuf, n| {
2191
2234
assert ! ( ( begin <= end) ) ;
2192
2235
assert ! ( ( end <= n) ) ;
@@ -2258,7 +2301,7 @@ pub mod raw {
2258
2301
let len = len ( * s) ;
2259
2302
assert ! ( ( len > 0 u) ) ;
2260
2303
let b = s[ 0 ] ;
2261
- * s = unsafe { raw:: slice_bytes_unique ( * s, 1 u, len) } ;
2304
+ * s = unsafe { raw:: slice_bytes_owned ( * s, 1 u, len) } ;
2262
2305
return b;
2263
2306
}
2264
2307
@@ -3289,6 +3332,66 @@ mod tests {
3289
3332
let _x = from_bytes(bb);
3290
3333
}
3291
3334
3335
+ #[test]
3336
+ fn test_unsafe_from_bytes_with_null() {
3337
+ let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
3338
+ let b = unsafe { raw::from_bytes_with_null(a) };
3339
+ assert_eq!(b, " AAAAAAA ");
3340
+ }
3341
+
3342
+ #[test]
3343
+ fn test_from_bytes_with_null() {
3344
+ let ss = " ศไทย中华Việt Nam ";
3345
+ let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
3346
+ 0xe0_u8, 0xb9_u8, 0x84_u8,
3347
+ 0xe0_u8, 0xb8_u8, 0x97_u8,
3348
+ 0xe0_u8, 0xb8_u8, 0xa2_u8,
3349
+ 0xe4_u8, 0xb8_u8, 0xad_u8,
3350
+ 0xe5_u8, 0x8d_u8, 0x8e_u8,
3351
+ 0x56_u8, 0x69_u8, 0xe1_u8,
3352
+ 0xbb_u8, 0x87_u8, 0x74_u8,
3353
+ 0x20_u8, 0x4e_u8, 0x61_u8,
3354
+ 0x6d_u8, 0x0_u8];
3355
+
3356
+ assert_eq!(ss, from_bytes_with_null(bb));
3357
+ }
3358
+
3359
+ #[test]
3360
+ #[should_fail]
3361
+ #[ignore(cfg(windows))]
3362
+ fn test_from_bytes_with_null_fail() {
3363
+ let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
3364
+ 0xe0_u8, 0xb9_u8, 0x84_u8,
3365
+ 0xe0_u8, 0xb8_u8, 0x97_u8,
3366
+ 0xe0_u8, 0xb8_u8, 0xa2_u8,
3367
+ 0xe4_u8, 0xb8_u8, 0xad_u8,
3368
+ 0xe5_u8, 0x8d_u8, 0x8e_u8,
3369
+ 0x56_u8, 0x69_u8, 0xe1_u8,
3370
+ 0xbb_u8, 0x87_u8, 0x74_u8,
3371
+ 0x20_u8, 0x4e_u8, 0x61_u8,
3372
+ 0x6d_u8, 0x0_u8];
3373
+
3374
+ let _x = from_bytes_with_null(bb);
3375
+ }
3376
+
3377
+ #[test]
3378
+ #[should_fail]
3379
+ #[ignore(cfg(windows))]
3380
+ fn test_from_bytes_with_null_fail_2() {
3381
+ let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
3382
+ 0xe0_u8, 0xb9_u8, 0x84_u8,
3383
+ 0xe0_u8, 0xb8_u8, 0x97_u8,
3384
+ 0xe0_u8, 0xb8_u8, 0xa2_u8,
3385
+ 0xe4_u8, 0xb8_u8, 0xad_u8,
3386
+ 0xe5_u8, 0x8d_u8, 0x8e_u8,
3387
+ 0x56_u8, 0x69_u8, 0xe1_u8,
3388
+ 0xbb_u8, 0x87_u8, 0x74_u8,
3389
+ 0x20_u8, 0x4e_u8, 0x61_u8,
3390
+ 0x6d_u8, 0x60_u8];
3391
+
3392
+ let _x = from_bytes_with_null(bb);
3393
+ }
3394
+
3292
3395
#[test]
3293
3396
fn test_from_buf() {
3294
3397
unsafe {
@@ -3351,6 +3454,23 @@ mod tests {
3351
3454
}
3352
3455
}
3353
3456
3457
+ #[test]
3458
+ fn test_subslice_offset() {
3459
+ let a = " kernelsprite";
3460
+ let b = slice(a, 7, len(a));
3461
+ let c = slice(a, 0, len(a) - 6);
3462
+ assert!(subslice_offset(a, b) == 7);
3463
+ assert!(subslice_offset(a, c) == 0);
3464
+ }
3465
+
3466
+ #[test]
3467
+ #[should_fail]
3468
+ fn test_subslice_offset_2() {
3469
+ let a = " alchemiter";
3470
+ let b = " cruxtruder";
3471
+ subslice_offset(a, b);
3472
+ }
3473
+
3354
3474
#[test]
3355
3475
fn vec_str_conversions() {
3356
3476
let s1: ~str = ~" All mimsy were the borogoves";
0 commit comments