File tree Expand file tree Collapse file tree 2 files changed +27
-8
lines changed Expand file tree Collapse file tree 2 files changed +27
-8
lines changed Original file line number Diff line number Diff line change @@ -596,17 +596,25 @@ pub fn is_utf8(v: &[u8]) -> bool {
596
596
let mut i = 0 u;
597
597
let total = v. len ( ) ;
598
598
while i < total {
599
- let mut chsize = utf8_char_width ( v[ i] ) ;
600
- if chsize == 0 u { return false ; }
601
- if i + chsize > total { return false ; }
602
- i += 1 u;
603
- while chsize > 1 u {
604
- if v[ i] & 192u8 != TAG_CONT_U8 { return false ; }
599
+ if v[ i] < 128u8 {
605
600
i += 1 u;
606
- chsize -= 1 u;
601
+ } else {
602
+ let w = utf8_char_width ( v[ i] ) ;
603
+ if w == 0 u { return false ; }
604
+
605
+ let nexti = i + w;
606
+ if nexti > total { return false ; }
607
+
608
+ if v[ i + 1 ] & 192u8 != TAG_CONT_U8 { return false ; }
609
+ if w > 2 {
610
+ if v[ i + 2 ] & 192u8 != TAG_CONT_U8 { return false ; }
611
+ if w > 3 && ( v[ i + 3 ] & 192u8 != TAG_CONT_U8 ) { return false ; }
612
+ }
613
+
614
+ i = nexti;
607
615
}
608
616
}
609
- return true ;
617
+ true
610
618
}
611
619
612
620
/// Determines if a vector of `u16` contains valid UTF-16
Original file line number Diff line number Diff line change @@ -27,9 +27,20 @@ pub fn main() {
27
27
assert ! ( s. char_at( 1 u) == 'é' ) ;
28
28
29
29
assert ! ( ( str :: is_utf8( s. as_bytes( ) ) ) ) ;
30
+ // invalid prefix
30
31
assert ! ( ( !str :: is_utf8( ~[ 0x80_u8 ] ) ) ) ;
32
+ // invalid 2 byte prefix
31
33
assert ! ( ( !str :: is_utf8( ~[ 0xc0_u8 ] ) ) ) ;
32
34
assert ! ( ( !str :: is_utf8( ~[ 0xc0_u8 , 0x10_u8 ] ) ) ) ;
35
+ // invalid 3 byte prefix
36
+ assert ! ( ( !str :: is_utf8( ~[ 0xe0_u8 ] ) ) ) ;
37
+ assert ! ( ( !str :: is_utf8( ~[ 0xe0_u8 , 0x10_u8 ] ) ) ) ;
38
+ assert ! ( ( !str :: is_utf8( ~[ 0xe0_u8 , 0xff_u8 , 0x10_u8 ] ) ) ) ;
39
+ // invalid 4 byte prefix
40
+ assert ! ( ( !str :: is_utf8( ~[ 0xf0_u8 ] ) ) ) ;
41
+ assert ! ( ( !str :: is_utf8( ~[ 0xf0_u8 , 0x10_u8 ] ) ) ) ;
42
+ assert ! ( ( !str :: is_utf8( ~[ 0xf0_u8 , 0xff_u8 , 0x10_u8 ] ) ) ) ;
43
+ assert ! ( ( !str :: is_utf8( ~[ 0xf0_u8 , 0xff_u8 , 0xff_u8 , 0x10_u8 ] ) ) ) ;
33
44
34
45
let mut stack = ~"a×c€";
35
46
assert_eq ! ( stack. pop_char( ) , '€' ) ;
You can’t perform that action at this time.
0 commit comments