---

huonw · huonw · commit e4cd2d874b46 · 2014-02-17T23:53:49.000+11:00
yaml --- r: 149343 b: refs/heads/try2 c: b7656d0 h: refs/heads/master i: 149341: 231739a 149339: 1018e02 149335: 327fcdf 149327: 544cce5 149311: 01d9c90 v: v3
diff --git a/[refs] b/[refs]
@@ -5,7 +5,7 @@ refs/heads/snap-stage3: 78a7676898d9f80ab540c6df5d4c9ce35bb50463
 refs/heads/try: 519addf6277dbafccbb4159db4b710c37eaa2ec5
 refs/tags/release-0.1: 1f5c5126e96c79d22cb7862f75304136e204f105
 refs/heads/ndm: f3868061cd7988080c30d6d5bf352a5a5fe2460b
-refs/heads/try2: 493a4b63c1d6791ae7d2001123d8953bd62aa443
+refs/heads/try2: b7656d048feb828af12278a6028c17b0c86241b3
 refs/heads/dist-snap: ba4081a5a8573875fed17545846f6f6902c8ba8d
 refs/tags/release-0.2: c870d2dffb391e14efb05aa27898f1f6333a9596
 refs/tags/release-0.3: b5f0d0f648d9a6153664837026ba1be43d3e2503
diff --git a/branches/try2/src/libstd/str.rs b/branches/try2/src/libstd/str.rs
@@ -824,41 +824,52 @@ pub fn is_utf16(v: &[u16]) -> bool {
     }
 }
 
-/// Iterates over the utf-16 characters in the specified slice, yielding each
-/// decoded unicode character to the function provided.
+/// An iterator that decodes UTF-16 encoded codepoints from a vector
+/// of `u16`s.
 ///
-/// # Failures
-///
-/// * Fails on invalid utf-16 data
-pub fn utf16_chars(v: &[u16], f: |char|) {
-    let len = v.len();
-    let mut i = 0u;
-    while i < len && v[i] != 0u16 {
-        let u = v[i];
-
-        if  u <= 0xD7FF_u16 || u >= 0xE000_u16 {
-            f(unsafe { cast::transmute(u as u32) });
-            i += 1u;
+/// Fails when it encounters invalid UTF-16 data.
+pub struct UTF16Chars<'a> {
+    priv iter: vec::Items<'a, u16>
+}
+impl<'a> Iterator<char> for UTF16Chars<'a> {
+    fn next(&mut self) -> Option<char> {
+        let u = match self.iter.next() {
+            Some(u) => *u,
+            None => return None
+        };
+        match char::from_u32(u as u32) {
+            Some(c) => Some(c),
+            None => {
+                let u2 = *self.iter.next().expect("UTF16Chars: unmatched lead surrogate");
+                if u < 0xD7FF || u > 0xDBFF ||
+                    u2 < 0xDC00 || u2 > 0xDFFF {
+                    fail!("UTF16Chars: invalid surrogate pair")
+                }
 
-        } else {
-            let u2 = v[i+1u];
-            assert!(u >= 0xD800_u16 && u <= 0xDBFF_u16);
-            assert!(u2 >= 0xDC00_u16 && u2 <= 0xDFFF_u16);
-            let mut c: u32 = (u - 0xD800_u16) as u32;
-            c = c << 10;
-            c |= (u2 - 0xDC00_u16) as u32;
-            c |= 0x1_0000_u32;
-            f(unsafe { cast::transmute(c) });
-            i += 2u;
+                let mut c = (u - 0xD800) as u32 << 10 | (u2 - 0xDC00) as u32 | 0x1_0000;
+                char::from_u32(c)
+            }
         }
     }
+
+    fn size_hint(&self) -> (uint, Option<uint>) {
+        let (low, high) = self.iter.size_hint();
+        // we could be entirely surrogates (2 elements per char), or
+        // entirely non-surrogates (1 element per char)
+        (low / 2, high)
+    }
+}
+
+/// Create an iterator over the UTF-16 encoded codepoints in `v`.
+///
+/// The iterator fails if it attempts to decode invalid UTF-16 data.
+pub fn utf16_chars<'a>(v: &'a [u16]) -> UTF16Chars<'a> {
+    UTF16Chars { iter : v.iter() }
 }
 
 /// Allocates a new string from the utf-16 slice provided
 pub fn from_utf16(v: &[u16]) -> ~str {
-    let mut buf = with_capacity(v.len());
-    utf16_chars(v, |ch| buf.push_char(ch));
-    buf
+    utf16_chars(v).collect()
 }
 
 /// Allocates a new string with the specified capacity. The string returned is