@@ -33,13 +33,14 @@ use unicode::{derived_property, property, general_category, decompose, conversio
33
33
#[ cfg( not( test) ) ] use default:: Default ;
34
34
35
35
// UTF-8 ranges and tags for encoding characters
36
- static TAG_CONT : uint = 128 u;
37
- static MAX_ONE_B : uint = 128 u;
38
- static TAG_TWO_B : uint = 192 u;
39
- static MAX_TWO_B : uint = 2048 u;
40
- static TAG_THREE_B : uint = 224 u;
41
- static MAX_THREE_B : uint = 65536 u;
42
- static TAG_FOUR_B : uint = 240 u;
36
+ static TAG_CONT : u8 = 0b1000_0000u8 ;
37
+ static TAG_TWO_B : u8 = 0b1100_0000u8 ;
38
+ static TAG_THREE_B : u8 = 0b1110_0000u8 ;
39
+ static TAG_FOUR_B : u8 = 0b1111_0000u8 ;
40
+ static MAX_ONE_B : u32 = 0x80u32 ;
41
+ static MAX_TWO_B : u32 = 0x800u32 ;
42
+ static MAX_THREE_B : u32 = 0x10000u32 ;
43
+ static MAX_FOUR_B : u32 = 0x200000u32 ;
43
44
44
45
/*
45
46
Lu Uppercase_Letter an uppercase letter
@@ -285,37 +286,37 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> {
285
286
}
286
287
287
288
// Constants from Unicode 6.2.0 Section 3.12 Conjoining Jamo Behavior
288
- static S_BASE : uint = 0xAC00 ;
289
- static L_BASE : uint = 0x1100 ;
290
- static V_BASE : uint = 0x1161 ;
291
- static T_BASE : uint = 0x11A7 ;
292
- static L_COUNT : uint = 19 ;
293
- static V_COUNT : uint = 21 ;
294
- static T_COUNT : uint = 28 ;
295
- static N_COUNT : uint = ( V_COUNT * T_COUNT ) ;
296
- static S_COUNT : uint = ( L_COUNT * N_COUNT ) ;
289
+ static S_BASE : u32 = 0xAC00 ;
290
+ static L_BASE : u32 = 0x1100 ;
291
+ static V_BASE : u32 = 0x1161 ;
292
+ static T_BASE : u32 = 0x11A7 ;
293
+ static L_COUNT : u32 = 19 ;
294
+ static V_COUNT : u32 = 21 ;
295
+ static T_COUNT : u32 = 28 ;
296
+ static N_COUNT : u32 = ( V_COUNT * T_COUNT ) ;
297
+ static S_COUNT : u32 = ( L_COUNT * N_COUNT ) ;
297
298
298
299
// Decompose a precomposed Hangul syllable
299
300
fn decompose_hangul ( s : char , f: |char|) {
300
- let si = s as uint - S_BASE ;
301
+ let si = s as u32 - S_BASE ;
301
302
302
303
let li = si / N_COUNT ;
303
304
unsafe {
304
- f ( transmute ( ( L_BASE + li) as u32 ) ) ;
305
+ f ( transmute ( L_BASE + li) ) ;
305
306
306
307
let vi = ( si % N_COUNT ) / T_COUNT ;
307
- f ( transmute ( ( V_BASE + vi) as u32 ) ) ;
308
+ f ( transmute ( V_BASE + vi) ) ;
308
309
309
310
let ti = si % T_COUNT ;
310
311
if ti > 0 {
311
- f ( transmute ( ( T_BASE + ti) as u32 ) ) ;
312
+ f ( transmute ( T_BASE + ti) ) ;
312
313
}
313
314
}
314
315
}
315
316
316
317
/// Returns the canonical decomposition of a character
317
318
pub fn decompose_canonical ( c : char , f: |char|) {
318
- if ( c as uint ) < S_BASE || ( c as uint ) >= ( S_BASE + S_COUNT ) {
319
+ if ( c as u32 ) < S_BASE || ( c as u32 ) >= ( S_BASE + S_COUNT ) {
319
320
decompose:: canonical ( c, f) ;
320
321
} else {
321
322
decompose_hangul ( c, f) ;
@@ -324,7 +325,7 @@ pub fn decompose_canonical(c: char, f: |char|) {
324
325
325
326
/// Returns the compatibility decomposition of a character
326
327
pub fn decompose_compatible ( c : char , f: |char|) {
327
- if ( c as uint ) < S_BASE || ( c as uint ) >= ( S_BASE + S_COUNT ) {
328
+ if ( c as u32 ) < S_BASE || ( c as u32 ) >= ( S_BASE + S_COUNT ) {
328
329
decompose:: compatibility ( c, f) ;
329
330
} else {
330
331
decompose_hangul ( c, f) ;
@@ -386,12 +387,7 @@ pub fn escape_default(c: char, f: |char|) {
386
387
387
388
/// Returns the amount of bytes this `char` would need if encoded in UTF-8
388
389
pub fn len_utf8_bytes ( c : char ) -> uint {
389
- static MAX_ONE_B : uint = 128 u;
390
- static MAX_TWO_B : uint = 2048 u;
391
- static MAX_THREE_B : uint = 65536 u;
392
- static MAX_FOUR_B : uint = 2097152 u;
393
-
394
- let code = c as uint ;
390
+ let code = c as u32 ;
395
391
match ( ) {
396
392
_ if code < MAX_ONE_B => 1 u,
397
393
_ if code < MAX_TWO_B => 2 u,
@@ -606,41 +602,40 @@ impl Char for char {
606
602
607
603
fn len_utf8_bytes ( & self ) -> uint { len_utf8_bytes ( * self ) }
608
604
609
- fn encode_utf8 ( & self , dst : & mut [ u8 ] ) -> uint {
610
- let code = * self as uint ;
605
+ fn encode_utf8 < ' a > ( & self , dst : & ' a mut [ u8 ] ) -> uint {
606
+ let code = * self as u32 ;
611
607
if code < MAX_ONE_B {
612
608
dst[ 0 ] = code as u8 ;
613
- return 1 ;
609
+ 1
614
610
} else if code < MAX_TWO_B {
615
- dst[ 0 ] = ( code >> 6 u & 31 u | TAG_TWO_B ) as u8 ;
616
- dst[ 1 ] = ( code & 63 u | TAG_CONT ) as u8 ;
617
- return 2 ;
611
+ dst[ 0 ] = ( code >> 6 u & 0x1F_u32 ) as u8 | TAG_TWO_B ;
612
+ dst[ 1 ] = ( code & 0x3F_u32 ) as u8 | TAG_CONT ;
613
+ 2
618
614
} else if code < MAX_THREE_B {
619
- dst[ 0 ] = ( code >> 12 u & 15 u | TAG_THREE_B ) as u8 ;
620
- dst[ 1 ] = ( code >> 6 u & 63 u | TAG_CONT ) as u8 ;
621
- dst[ 2 ] = ( code & 63 u | TAG_CONT ) as u8 ;
622
- return 3 ;
615
+ dst[ 0 ] = ( code >> 12 u & 0x0F_u32 ) as u8 | TAG_THREE_B ;
616
+ dst[ 1 ] = ( code >> 6 u & 0x3F_u32 ) as u8 | TAG_CONT ;
617
+ dst[ 2 ] = ( code & 0x3F_u32 ) as u8 | TAG_CONT ;
618
+ 3
623
619
} else {
624
- dst[ 0 ] = ( code >> 18 u & 7 u | TAG_FOUR_B ) as u8 ;
625
- dst[ 1 ] = ( code >> 12 u & 63 u | TAG_CONT ) as u8 ;
626
- dst[ 2 ] = ( code >> 6 u & 63 u | TAG_CONT ) as u8 ;
627
- dst[ 3 ] = ( code & 63 u | TAG_CONT ) as u8 ;
628
- return 4 ;
620
+ dst[ 0 ] = ( code >> 18 u & 0x07_u32 ) as u8 | TAG_FOUR_B ;
621
+ dst[ 1 ] = ( code >> 12 u & 0x3F_u32 ) as u8 | TAG_CONT ;
622
+ dst[ 2 ] = ( code >> 6 u & 0x3F_u32 ) as u8 | TAG_CONT ;
623
+ dst[ 3 ] = ( code & 0x3F_u32 ) as u8 | TAG_CONT ;
624
+ 4
629
625
}
630
626
}
631
627
632
628
fn encode_utf16 ( & self , dst : & mut [ u16 ] ) -> uint {
633
- let mut ch = * self as uint ;
634
- if ( ch & 0xFFFF_ u) == ch {
635
- // The BMP falls through (assuming non-surrogate, as it
636
- // should)
637
- assert ! ( ch <= 0xD7FF_ u || ch >= 0xE000_ u) ;
629
+ let mut ch = * self as u32 ;
630
+ if ( ch & 0xFFFF_u32 ) == ch {
631
+ // The BMP falls through (assuming non-surrogate, as it should)
632
+ assert ! ( ch <= 0xD7FF_u32 || ch >= 0xE000_u32 ) ;
638
633
dst[ 0 ] = ch as u16 ;
639
634
1
640
635
} else {
641
636
// Supplementary planes break into surrogates.
642
- assert ! ( ch >= 0x1_0000_ u && ch <= 0x10_FFFF_ u ) ;
643
- ch -= 0x1_0000_ u ;
637
+ assert ! ( ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32 ) ;
638
+ ch -= 0x1_0000_u32 ;
644
639
dst[ 0 ] = 0xD800_u16 | ( ( ch >> 10 ) as u16 ) ;
645
640
dst[ 1 ] = 0xDC00_u16 | ( ( ch as u16 ) & 0x3FF_u16 ) ;
646
641
2
0 commit comments