---

blake2-ppc · blake2-ppc · commit 5de818e7d828 · 2013-07-30T19:16:12.000+02:00
yaml --- r: 69486 b: refs/heads/auto c: b4ff955 h: refs/heads/master v: v3
diff --git a/[refs] b/[refs]
@@ -14,6 +14,6 @@ refs/heads/try3: 9387340aab40a73e8424c48fd42f0c521a4875c0
 refs/tags/release-0.3.1: 495bae036dfe5ec6ceafd3312b4dca48741e845b
 refs/tags/release-0.4: e828ea2080499553b97dfe33b3f4d472b4562ad7
 refs/tags/release-0.5: 7e3bcfbf21278251ee936ad53e92e9b719702d73
-refs/heads/auto: f7ebab440322d9c2a20cd8c1e23db789ffdc8685
+refs/heads/auto: b4ff95599a05da66d2ba0955cc7ae33dd6bfe7fe
 refs/heads/servo: af82457af293e2a842ba6b7759b70288da276167
 refs/tags/release-0.6: b4ebcfa1812664df5e142f0134a5faea3918544c
diff --git a/branches/auto/src/compiletest/compiletest.rs b/branches/auto/src/compiletest/compiletest.rs
@@ -85,7 +85,6 @@ pub fn parse_config(args: ~[~str]) -> config {
     if args[1] == ~"-h" || args[1] == ~"--help" {
         let message = fmt!("Usage: %s [OPTIONS] [TESTNAME...]", argv0);
         println(getopts::groups::usage(message, groups));
-        println("");
         fail!()
     }
 
@@ -98,7 +97,6 @@ pub fn parse_config(args: ~[~str]) -> config {
     if getopts::opt_present(matches, "h") || getopts::opt_present(matches, "help") {
         let message = fmt!("Usage: %s [OPTIONS]  [TESTNAME...]", argv0);
         println(getopts::groups::usage(message, groups));
-        println("");
         fail!()
     }
 
diff --git a/branches/auto/src/libextra/getopts.rs b/branches/auto/src/libextra/getopts.rs
@@ -680,7 +680,7 @@ pub mod groups {
         return brief.to_owned() +
                "\n\nOptions:\n" +
                rows.collect::<~[~str]>().connect("\n") +
-               "\n";
+               "\n\n";
     }
 
     /** Splits a string into substrings with possibly internal whitespace,
@@ -1463,6 +1463,7 @@ Options:
     -k --kiwi           Desc
     -p [VAL]            Desc
     -l VAL              Desc
+
 ";
 
         let generated_usage = groups::usage("Usage: fruits", optgroups);
@@ -1491,6 +1492,7 @@ Options:
     -k --kiwi           This is a long description which won't be wrapped..+..
     -a --apple          This is a long description which _will_ be
                         wrapped..+..
+
 ";
 
         let usage = groups::usage("Usage: fruits", optgroups);
diff --git a/branches/auto/src/libextra/test.rs b/branches/auto/src/libextra/test.rs
@@ -191,7 +191,6 @@ fn optgroups() -> ~[getopts::groups::OptGroup] {
 fn usage(binary: &str, helpstr: &str) -> ! {
     let message = fmt!("Usage: %s [OPTIONS] [FILTER]", binary);
     println(groups::usage(message, optgroups()));
-    println("");
     if helpstr == "help" {
         println("\
 The FILTER is matched against the name of all tests to run, and if any tests
diff --git a/branches/auto/src/libstd/str.rs b/branches/auto/src/libstd/str.rs
@@ -564,6 +564,18 @@ fn match_at<'a,'b>(haystack: &'a str, needle: &'b str, at: uint) -> bool {
 Section: Misc
 */
 
+// Return the initial codepoint accumulator for the first byte.
+// The first byte is special, only want bottom 5 bits for width 2, 4 bits
+// for width 3, and 3 bits for width 4
+macro_rules! utf8_first_byte(
+    ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as uint)
+)
+
+// return the value of $ch updated with continuation byte $byte
+macro_rules! utf8_acc_cont_byte(
+    ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as uint)
+)
+
 /// Determines if a vector of bytes contains valid UTF-8
 pub fn is_utf8(v: &[u8]) -> bool {
     let mut i = 0u;
@@ -577,11 +589,26 @@ pub fn is_utf8(v: &[u8]) -> bool {
 
             let nexti = i + w;
             if nexti > total { return false; }
+            // 1. Make sure the correct number of continuation bytes are present
+            // 2. Check codepoint ranges (deny overlong encodings)
+            //    2-byte encoding is for codepoints  \u0080 to  \u07ff
+            //    3-byte encoding is for codepoints  \u0800 to  \uffff
+            //    4-byte encoding is for codepoints \u10000 to \u10ffff
 
+            //    2-byte encodings are correct if the width and continuation match up
             if v[i + 1] & 192u8 != TAG_CONT_U8 { return false; }
             if w > 2 {
+                let mut ch;
+                ch = utf8_first_byte!(v[i], w);
+                ch = utf8_acc_cont_byte!(ch, v[i + 1]);
                 if v[i + 2] & 192u8 != TAG_CONT_U8 { return false; }
-                if w > 3 && (v[i + 3] & 192u8 != TAG_CONT_U8) { return false; }
+                ch = utf8_acc_cont_byte!(ch, v[i + 2]);
+                if w == 3 && ch < MAX_TWO_B { return false; }
+                if w > 3 {
+                    if v[i + 3] & 192u8 != TAG_CONT_U8 { return false; }
+                    ch = utf8_acc_cont_byte!(ch, v[i + 3]);
+                    if ch < MAX_THREE_B || ch >= MAX_UNICODE { return false; }
+                }
             }
 
             i = nexti;
@@ -712,7 +739,7 @@ static UTF8_CHAR_WIDTH: [u8, ..256] = [
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
@@ -738,6 +765,7 @@ static MAX_TWO_B: uint = 2048u;
 static TAG_THREE_B: uint = 224u;
 static MAX_THREE_B: uint = 65536u;
 static TAG_FOUR_B: uint = 240u;
+static MAX_UNICODE: uint = 1114112u;
 
 /// Unsafe operations
 pub mod raw {
@@ -1665,12 +1693,10 @@ impl<'self> StrSlice<'self> for &'self str {
             let w = UTF8_CHAR_WIDTH[val] as uint;
             assert!((w != 0));
 
-            // First byte is special, only want bottom 5 bits for width 2, 4 bits
-            // for width 3, and 3 bits for width 4
-            val &= 0x7Fu >> w;
-            val = (val << 6) | (s[i + 1] & 63u8) as uint;
-            if w > 2 { val = (val << 6) | (s[i + 2] & 63u8) as uint; }
-            if w > 3 { val = (val << 6) | (s[i + 3] & 63u8) as uint; }
+            val = utf8_first_byte!(val, w);
+            val = utf8_acc_cont_byte!(val, s[i + 1]);
+            if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
+            if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
 
             return CharRange {ch: val as char, next: i + w};
         }
@@ -2035,7 +2061,7 @@ impl OwnedStr for ~str {
     /// Appends a character to the back of a string
     #[inline]
     fn push_char(&mut self, c: char) {
-        assert!(c as uint <= 0x10ffff); // FIXME: #7609: should be enforced on all `char`
+        assert!((c as uint) < MAX_UNICODE); // FIXME: #7609: should be enforced on all `char`
         unsafe {
             let code = c as uint;
             let nb = if code < MAX_ONE_B { 1u }
@@ -2802,6 +2828,17 @@ mod tests {
         assert_eq!(ss, from_bytes(bb));
     }
 
+    #[test]
+    fn test_is_utf8_deny_overlong() {
+        assert!(!is_utf8([0xc0, 0x80]));
+        assert!(!is_utf8([0xc0, 0xae]));
+        assert!(!is_utf8([0xe0, 0x80, 0x80]));
+        assert!(!is_utf8([0xe0, 0x80, 0xaf]));
+        assert!(!is_utf8([0xe0, 0x81, 0x81]));
+        assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
+    }
+
+
     #[test]
     #[ignore(cfg(windows))]
     fn test_from_bytes_fail() {

Original file line number	Diff line number	Diff line change
`@@ -85,7 +85,6 @@ pub fn parse_config(args: ~[~str]) -> config {`
`85`	`85`	`if args[1] == ~"-h" \|\| args[1] == ~"--help" {`
`86`	`86`	`let message = fmt!("Usage: %s [OPTIONS] [TESTNAME...]", argv0);`
`87`	`87`	`println(getopts::groups::usage(message, groups));`
`88`		`- println("");`
`89`	`88`	`fail!()`
`90`	`89`	`}`
`91`	`90`
`@@ -98,7 +97,6 @@ pub fn parse_config(args: ~[~str]) -> config {`
`98`	`97`	`if getopts::opt_present(matches, "h") \|\| getopts::opt_present(matches, "help") {`
`99`	`98`	`let message = fmt!("Usage: %s [OPTIONS] [TESTNAME...]", argv0);`
`100`	`99`	`println(getopts::groups::usage(message, groups));`
`101`		`- println("");`
`102`	`100`	`fail!()`
`103`	`101`	`}`
`104`	`102`