Skip to content

Commit e011939

Browse files
author
Keegan McAllister
committed
Implement Char::encode_utf16
And clean up encode_utf8 a bit.
1 parent 8b6091e commit e011939

File tree

2 files changed

+32
-19
lines changed

2 files changed

+32
-19
lines changed

src/libstd/char.rs

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -560,11 +560,19 @@ pub trait Char {
560560

561561
/// Encodes this character as UTF-8 into the provided byte buffer.
562562
///
563-
/// The buffer must be at least 4 bytes long or a runtime failure will
563+
/// The buffer must be at least 4 bytes long or a runtime failure may
564564
/// occur.
565565
///
566-
/// This will then return the number of characters written to the slice.
566+
/// This will then return the number of bytes written to the slice.
567567
fn encode_utf8(&self, dst: &mut [u8]) -> uint;
568+
569+
/// Encodes this character as UTF-16 into the provided `u16` buffer.
570+
///
571+
/// The buffer must be at least 2 elements long or a runtime failure may
572+
/// occur.
573+
///
574+
/// This will then return the number of `u16`s written to the slice.
575+
fn encode_utf16(&self, dst: &mut [u16]) -> uint;
568576
}
569577

570578
impl Char for char {
@@ -602,7 +610,7 @@ impl Char for char {
602610

603611
fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }
604612

605-
fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint {
613+
fn encode_utf8(&self, dst: &mut [u8]) -> uint {
606614
let code = *self as uint;
607615
if code < MAX_ONE_B {
608616
dst[0] = code as u8;
@@ -624,6 +632,24 @@ impl Char for char {
624632
return 4;
625633
}
626634
}
635+
636+
fn encode_utf16(&self, dst: &mut [u16]) -> uint {
637+
let mut ch = *self as uint;
638+
if (ch & 0xFFFF_u) == ch {
639+
// The BMP falls through (assuming non-surrogate, as it
640+
// should)
641+
assert!(ch <= 0xD7FF_u || ch >= 0xE000_u);
642+
dst[0] = ch as u16;
643+
1
644+
} else {
645+
// Supplementary planes break into surrogates.
646+
assert!(ch >= 0x1_0000_u && ch <= 0x10_FFFF_u);
647+
ch -= 0x1_0000_u;
648+
dst[0] = 0xD800_u16 | ((ch >> 10) as u16);
649+
dst[1] = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
650+
2
651+
}
652+
}
627653
}
628654

629655
#[cfg(not(test))]

src/libstd/str.rs

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2555,22 +2555,9 @@ impl<'a> StrSlice<'a> for &'a str {
25552555
fn to_utf16(&self) -> ~[u16] {
25562556
let mut u = ~[];
25572557
for ch in self.chars() {
2558-
// Arithmetic with u32 literals is easier on the eyes than chars.
2559-
let mut ch = ch as u32;
2560-
2561-
if (ch & 0xFFFF_u32) == ch {
2562-
// The BMP falls through (assuming non-surrogate, as it
2563-
// should)
2564-
assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
2565-
u.push(ch as u16)
2566-
} else {
2567-
// Supplementary planes break into surrogates.
2568-
assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
2569-
ch -= 0x1_0000_u32;
2570-
let w1 = 0xD800_u16 | ((ch >> 10) as u16);
2571-
let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
2572-
u.push_all([w1, w2])
2573-
}
2558+
let mut buf = [0u16, ..2];
2559+
let n = ch.encode_utf16(buf /* as mut slice! */);
2560+
u.push_all(buf.slice_to(n));
25742561
}
25752562
u
25762563
}

0 commit comments

Comments
 (0)