@@ -56,12 +56,23 @@ Section: Creating a string
56
56
pub fn from_bytes ( vv : & [ u8 ] ) -> ~str {
57
57
use str:: not_utf8:: cond;
58
58
59
- if !is_utf8 ( vv) {
60
- let first_bad_byte = * vv. iter ( ) . find ( |& b| !is_utf8 ( [ * b] ) ) . unwrap ( ) ;
61
- cond. raise ( fmt ! ( "from_bytes: input is not UTF-8; first bad byte is %u" ,
62
- first_bad_byte as uint) )
59
+ match from_bytes_opt ( vv) {
60
+ None => {
61
+ let first_bad_byte = * vv. iter ( ) . find ( |& b| !is_utf8 ( [ * b] ) ) . unwrap ( ) ;
62
+ cond. raise ( fmt ! ( "from_bytes: input is not UTF-8; first bad byte is %u" ,
63
+ first_bad_byte as uint) )
64
+ }
65
+ Some ( s) => s
66
+ }
67
+ }
68
+
69
+ /// Convert a vector of bytes to a new UTF-8 string, if possible.
70
+ /// Returns None if the vector contains invalid UTF-8.
71
+ pub fn from_bytes_opt ( vv : & [ u8 ] ) -> Option < ~str > {
72
+ if is_utf8 ( vv) {
73
+ Some ( unsafe { raw:: from_bytes ( vv) } )
63
74
} else {
64
- return unsafe { raw :: from_bytes ( vv ) }
75
+ None
65
76
}
66
77
}
67
78
@@ -78,7 +89,17 @@ pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
78
89
cond. raise ( fmt ! ( "from_bytes: input is not UTF-8; first bad byte is %u" ,
79
90
first_bad_byte as uint) )
80
91
} else {
81
- return unsafe { raw:: from_bytes_owned ( vv) }
92
+ unsafe { raw:: from_bytes_owned ( vv) }
93
+ }
94
+ }
95
+
96
+ /// Consumes a vector of bytes to create a new utf-8 string.
97
+ /// Returns None if the vector contains invalid UTF-8.
98
+ pub fn from_bytes_owned_opt ( vv : ~[ u8 ] ) -> Option < ~str > {
99
+ if is_utf8 ( vv) {
100
+ Some ( unsafe { raw:: from_bytes_owned ( vv) } )
101
+ } else {
102
+ None
82
103
}
83
104
}
84
105
@@ -91,8 +112,16 @@ pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
91
112
///
92
113
/// Fails if invalid UTF-8
93
114
pub fn from_bytes_slice < ' a > ( v : & ' a [ u8 ] ) -> & ' a str {
94
- assert ! ( is_utf8( v) ) ;
95
- unsafe { cast:: transmute ( v) }
115
+ from_bytes_slice_opt ( v) . expect ( "from_bytes_slice: not utf-8" )
116
+ }
117
+
118
+ /// Converts a vector to a string slice without performing any allocations.
119
+ ///
120
+ /// Returns None if the slice is not utf-8.
121
+ pub fn from_bytes_slice_opt < ' a > ( v : & ' a [ u8 ] ) -> Option < & ' a str > {
122
+ if is_utf8 ( v) {
123
+ Some ( unsafe { cast:: transmute ( v) } )
124
+ } else { None }
96
125
}
97
126
98
127
impl ToStr for ~str {
@@ -2358,7 +2387,7 @@ impl Zero for @str {
2358
2387
#[cfg(test)]
2359
2388
mod tests {
2360
2389
use container::Container;
2361
- use option::Some;
2390
+ use option::{None, Some} ;
2362
2391
use libc::c_char;
2363
2392
use libc;
2364
2393
use ptr;
@@ -3539,6 +3568,76 @@ mod tests {
3539
3568
let mut s = ~"\u00FC " ; // ü
3540
3569
s. truncate( 1 ) ;
3541
3570
}
3571
+
3572
+ #[ test]
3573
+ fn test_str_from_bytes_slice( ) {
3574
+ let xs = bytes!( "hello" ) ;
3575
+ assert_eq!( from_bytes_slice( xs) , "hello" ) ;
3576
+
3577
+ let xs = bytes!( "ศไทย中华Việt Nam" ) ;
3578
+ assert_eq!( from_bytes_slice( xs) , "ศไทย中华Việt Nam" ) ;
3579
+ }
3580
+
3581
+ #[ test]
3582
+ #[ should_fail]
3583
+ fn test_str_from_bytes_slice_invalid( ) {
3584
+ let xs = bytes!( "hello" , 0xff ) ;
3585
+ let _ = from_bytes_slice( xs) ;
3586
+ }
3587
+
3588
+ #[ test]
3589
+ fn test_str_from_bytes_slice_opt( ) {
3590
+ let xs = bytes!( "hello" ) ;
3591
+ assert_eq!( from_bytes_slice_opt( xs) , Some ( "hello" ) ) ;
3592
+
3593
+ let xs = bytes!( "ศไทย中华Việt Nam" ) ;
3594
+ assert_eq!( from_bytes_slice_opt( xs) , Some ( "ศไทย中华Việt Nam" ) ) ;
3595
+
3596
+ let xs = bytes!( "hello" , 0xff ) ;
3597
+ assert_eq!( from_bytes_slice_opt( xs) , None ) ;
3598
+ }
3599
+
3600
+ #[ test]
3601
+ fn test_str_from_bytes( ) {
3602
+ let xs = bytes!( "hello" ) ;
3603
+ assert_eq!( from_bytes( xs) , ~"hello");
3604
+
3605
+ let xs = bytes!(" ศไทย中华Việt Nam ");
3606
+ assert_eq!(from_bytes(xs), ~" ศไทย中华Việt Nam ");
3607
+ }
3608
+
3609
+ #[test]
3610
+ fn test_str_from_bytes_opt() {
3611
+ let xs = bytes!(" hello").to_owned();
3612
+ assert_eq!(from_bytes_opt(xs), Some(~" hello"));
3613
+
3614
+ let xs = bytes!(" ศไทย中华Việt Nam ");
3615
+ assert_eq!(from_bytes_opt(xs), Some(~" ศไทย中华Việt Nam "));
3616
+
3617
+ let xs = bytes!(" hello", 0xff);
3618
+ assert_eq!(from_bytes_opt(xs), None);
3619
+ }
3620
+
3621
+ #[test]
3622
+ fn test_str_from_bytes_owned() {
3623
+ let xs = bytes!(" hello").to_owned();
3624
+ assert_eq!(from_bytes_owned(xs), ~" hello");
3625
+
3626
+ let xs = bytes!(" ศไทย中华Việt Nam ").to_owned();
3627
+ assert_eq!(from_bytes_owned(xs), ~" ศไทย中华Việt Nam ");
3628
+ }
3629
+
3630
+ #[test]
3631
+ fn test_str_from_bytes_owned_opt() {
3632
+ let xs = bytes!(" hello").to_owned();
3633
+ assert_eq!(from_bytes_owned_opt(xs), Some(~" hello"));
3634
+
3635
+ let xs = bytes!(" ศไทย中华Việt Nam ").to_owned();
3636
+ assert_eq!(from_bytes_owned_opt(xs), Some(~" ศไทย中华Việt Nam "));
3637
+
3638
+ let xs = bytes!(" hello", 0xff).to_owned();
3639
+ assert_eq!(from_bytes_owned_opt(xs), None);
3640
+ }
3542
3641
}
3543
3642
3544
3643
#[cfg(test)]
0 commit comments