Skip to content

Commit 2f71b72

Browse files
Florobalexcrichton
authored andcommitted
core: Use appropriately sized integers for codepoints and bytes
1 parent 74ad023 commit 2f71b72

File tree

1 file changed

+45
-50
lines changed

1 file changed

+45
-50
lines changed

src/libcore/char.rs

Lines changed: 45 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@ use unicode::{derived_property, property, general_category, decompose, conversio
3333
#[cfg(not(test))] use default::Default;
3434

3535
// UTF-8 ranges and tags for encoding characters
36-
static TAG_CONT: uint = 128u;
37-
static MAX_ONE_B: uint = 128u;
38-
static TAG_TWO_B: uint = 192u;
39-
static MAX_TWO_B: uint = 2048u;
40-
static TAG_THREE_B: uint = 224u;
41-
static MAX_THREE_B: uint = 65536u;
42-
static TAG_FOUR_B: uint = 240u;
36+
static TAG_CONT: u8 = 0b1000_0000u8;
37+
static TAG_TWO_B: u8 = 0b1100_0000u8;
38+
static TAG_THREE_B: u8 = 0b1110_0000u8;
39+
static TAG_FOUR_B: u8 = 0b1111_0000u8;
40+
static MAX_ONE_B: u32 = 0x80u32;
41+
static MAX_TWO_B: u32 = 0x800u32;
42+
static MAX_THREE_B: u32 = 0x10000u32;
43+
static MAX_FOUR_B: u32 = 0x200000u32;
4344

4445
/*
4546
Lu Uppercase_Letter an uppercase letter
@@ -285,37 +286,37 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> {
285286
}
286287

287288
// Constants from Unicode 6.2.0 Section 3.12 Conjoining Jamo Behavior
288-
static S_BASE: uint = 0xAC00;
289-
static L_BASE: uint = 0x1100;
290-
static V_BASE: uint = 0x1161;
291-
static T_BASE: uint = 0x11A7;
292-
static L_COUNT: uint = 19;
293-
static V_COUNT: uint = 21;
294-
static T_COUNT: uint = 28;
295-
static N_COUNT: uint = (V_COUNT * T_COUNT);
296-
static S_COUNT: uint = (L_COUNT * N_COUNT);
289+
static S_BASE: u32 = 0xAC00;
290+
static L_BASE: u32 = 0x1100;
291+
static V_BASE: u32 = 0x1161;
292+
static T_BASE: u32 = 0x11A7;
293+
static L_COUNT: u32 = 19;
294+
static V_COUNT: u32 = 21;
295+
static T_COUNT: u32 = 28;
296+
static N_COUNT: u32 = (V_COUNT * T_COUNT);
297+
static S_COUNT: u32 = (L_COUNT * N_COUNT);
297298

298299
// Decompose a precomposed Hangul syllable
299300
fn decompose_hangul(s: char, f: |char|) {
300-
let si = s as uint - S_BASE;
301+
let si = s as u32 - S_BASE;
301302

302303
let li = si / N_COUNT;
303304
unsafe {
304-
f(transmute((L_BASE + li) as u32));
305+
f(transmute(L_BASE + li));
305306

306307
let vi = (si % N_COUNT) / T_COUNT;
307-
f(transmute((V_BASE + vi) as u32));
308+
f(transmute(V_BASE + vi));
308309

309310
let ti = si % T_COUNT;
310311
if ti > 0 {
311-
f(transmute((T_BASE + ti) as u32));
312+
f(transmute(T_BASE + ti));
312313
}
313314
}
314315
}
315316

316317
/// Returns the canonical decomposition of a character
317318
pub fn decompose_canonical(c: char, f: |char|) {
318-
if (c as uint) < S_BASE || (c as uint) >= (S_BASE + S_COUNT) {
319+
if (c as u32) < S_BASE || (c as u32) >= (S_BASE + S_COUNT) {
319320
decompose::canonical(c, f);
320321
} else {
321322
decompose_hangul(c, f);
@@ -324,7 +325,7 @@ pub fn decompose_canonical(c: char, f: |char|) {
324325

325326
/// Returns the compatibility decomposition of a character
326327
pub fn decompose_compatible(c: char, f: |char|) {
327-
if (c as uint) < S_BASE || (c as uint) >= (S_BASE + S_COUNT) {
328+
if (c as u32) < S_BASE || (c as u32) >= (S_BASE + S_COUNT) {
328329
decompose::compatibility(c, f);
329330
} else {
330331
decompose_hangul(c, f);
@@ -386,12 +387,7 @@ pub fn escape_default(c: char, f: |char|) {
386387

387388
/// Returns the amount of bytes this `char` would need if encoded in UTF-8
388389
pub fn len_utf8_bytes(c: char) -> uint {
389-
static MAX_ONE_B: uint = 128u;
390-
static MAX_TWO_B: uint = 2048u;
391-
static MAX_THREE_B: uint = 65536u;
392-
static MAX_FOUR_B: uint = 2097152u;
393-
394-
let code = c as uint;
390+
let code = c as u32;
395391
match () {
396392
_ if code < MAX_ONE_B => 1u,
397393
_ if code < MAX_TWO_B => 2u,
@@ -606,41 +602,40 @@ impl Char for char {
606602

607603
fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }
608604

609-
fn encode_utf8(&self, dst: &mut [u8]) -> uint {
610-
let code = *self as uint;
605+
fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint {
606+
let code = *self as u32;
611607
if code < MAX_ONE_B {
612608
dst[0] = code as u8;
613-
return 1;
609+
1
614610
} else if code < MAX_TWO_B {
615-
dst[0] = (code >> 6u & 31u | TAG_TWO_B) as u8;
616-
dst[1] = (code & 63u | TAG_CONT) as u8;
617-
return 2;
611+
dst[0] = (code >> 6u & 0x1F_u32) as u8 | TAG_TWO_B;
612+
dst[1] = (code & 0x3F_u32) as u8 | TAG_CONT;
613+
2
618614
} else if code < MAX_THREE_B {
619-
dst[0] = (code >> 12u & 15u | TAG_THREE_B) as u8;
620-
dst[1] = (code >> 6u & 63u | TAG_CONT) as u8;
621-
dst[2] = (code & 63u | TAG_CONT) as u8;
622-
return 3;
615+
dst[0] = (code >> 12u & 0x0F_u32) as u8 | TAG_THREE_B;
616+
dst[1] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
617+
dst[2] = (code & 0x3F_u32) as u8 | TAG_CONT;
618+
3
623619
} else {
624-
dst[0] = (code >> 18u & 7u | TAG_FOUR_B) as u8;
625-
dst[1] = (code >> 12u & 63u | TAG_CONT) as u8;
626-
dst[2] = (code >> 6u & 63u | TAG_CONT) as u8;
627-
dst[3] = (code & 63u | TAG_CONT) as u8;
628-
return 4;
620+
dst[0] = (code >> 18u & 0x07_u32) as u8 | TAG_FOUR_B;
621+
dst[1] = (code >> 12u & 0x3F_u32) as u8 | TAG_CONT;
622+
dst[2] = (code >> 6u & 0x3F_u32) as u8 | TAG_CONT;
623+
dst[3] = (code & 0x3F_u32) as u8 | TAG_CONT;
624+
4
629625
}
630626
}
631627

632628
fn encode_utf16(&self, dst: &mut [u16]) -> uint {
633-
let mut ch = *self as uint;
634-
if (ch & 0xFFFF_u) == ch {
635-
// The BMP falls through (assuming non-surrogate, as it
636-
// should)
637-
assert!(ch <= 0xD7FF_u || ch >= 0xE000_u);
629+
let mut ch = *self as u32;
630+
if (ch & 0xFFFF_u32) == ch {
631+
// The BMP falls through (assuming non-surrogate, as it should)
632+
assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
638633
dst[0] = ch as u16;
639634
1
640635
} else {
641636
// Supplementary planes break into surrogates.
642-
assert!(ch >= 0x1_0000_u && ch <= 0x10_FFFF_u);
643-
ch -= 0x1_0000_u;
637+
assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
638+
ch -= 0x1_0000_u32;
644639
dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
645640
dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
646641
2

0 commit comments

Comments
 (0)