Skip to content

Commit 9efa23e

Browse files
committed
auto merge of #19042 : SimonSapin/rust/generic-utf16-encoder, r=alexcrichton
This allows encoding to UTF-16 something that is not in UTF-8, e.g. a `[char]` UTF-32 string. This might help with servo/servo#4023
2 parents 47c1d43 + dff48a9 commit 9efa23e

File tree

3 files changed

+34
-4
lines changed

3 files changed

+34
-4
lines changed

src/libcollections/str.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ use vec::Vec;
7474
pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
7575
pub use core::str::{Bytes, CharSplits};
7676
pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits};
77-
pub use core::str::{Utf16CodeUnits, eq_slice, is_utf8, is_utf16, Utf16Items};
77+
pub use core::str::{Utf16Encoder, Utf16CodeUnits};
78+
pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
7879
pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
7980
pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
8081
pub use core::str::{FromStr, from_str};

src/libcore/str.rs

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -762,11 +762,33 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
762762
/// Use with the `std::iter` module.
763763
#[deriving(Clone)]
764764
pub struct Utf16CodeUnits<'a> {
765-
chars: Chars<'a>,
766-
extra: u16
765+
encoder: Utf16Encoder<Chars<'a>>
767766
}
768767

769768
impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
769+
#[inline]
770+
fn next(&mut self) -> Option<u16> { self.encoder.next() }
771+
772+
#[inline]
773+
fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
774+
}
775+
776+
777+
/// Iterator adaptor for encoding `char`s to UTF-16.
778+
#[deriving(Clone)]
779+
pub struct Utf16Encoder<I> {
780+
chars: I,
781+
extra: u16
782+
}
783+
784+
impl<I> Utf16Encoder<I> {
785+
/// Create an UTF-16 encoder from any `char` iterator.
786+
pub fn new(chars: I) -> Utf16Encoder<I> where I: Iterator<char> {
787+
Utf16Encoder { chars: chars, extra: 0 }
788+
}
789+
}
790+
791+
impl<I> Iterator<u16> for Utf16Encoder<I> where I: Iterator<char> {
770792
#[inline]
771793
fn next(&mut self) -> Option<u16> {
772794
if self.extra != 0 {
@@ -2225,7 +2247,7 @@ impl StrPrelude for str {
22252247

22262248
#[inline]
22272249
fn utf16_units(&self) -> Utf16CodeUnits {
2228-
Utf16CodeUnits{ chars: self.chars(), extra: 0}
2250+
Utf16CodeUnits { encoder: Utf16Encoder::new(self.chars()) }
22292251
}
22302252

22312253
#[inline]

src/libcoretest/str.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,10 @@ fn test_rev_split_char_iterator_no_trailing() {
114114
split.reverse();
115115
assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
116116
}
117+
118+
#[test]
119+
fn test_utf16_code_units() {
120+
use core::str::Utf16Encoder;
121+
assert_eq!(Utf16Encoder::new(vec!['é', '\U0001F4A9'].into_iter()).collect::<Vec<u16>>(),
122+
vec![0xE9, 0xD83D, 0xDCA9])
123+
}

0 commit comments

Comments
 (0)