Skip to content

Commit 7585b34

Browse files
committed
auto merge of #8446 : alexcrichton/rust/ifmt++, r=graydon
This includes a number of improvements to `ifmt!` * Implements formatting arguments -- `{:0.5x}` works now * Formatting now works on all integer widths, not just `int` and `uint` * Added a large doc block to `std::fmt` which should help explain what `ifmt!` is all about * Added floating point formatters, although they have the same pitfalls from before (they're just proof-of-concept now) Closed a couple of issues along the way, yay! Once this gets into a snapshot, I'll start looking into removing all of `fmt`
2 parents 433fbe8 + 36882b3 commit 7585b34

File tree

8 files changed

+805
-168
lines changed

8 files changed

+805
-168
lines changed

src/libstd/char.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@ use unicode::{derived_property, general_category};
2020
#[cfg(not(test))] use cmp::{Eq, Ord};
2121
#[cfg(not(test))] use num::Zero;
2222

23+
// UTF-8 ranges and tags for encoding characters
24+
static TAG_CONT: uint = 128u;
25+
static MAX_ONE_B: uint = 128u;
26+
static TAG_TWO_B: uint = 192u;
27+
static MAX_TWO_B: uint = 2048u;
28+
static TAG_THREE_B: uint = 224u;
29+
static MAX_THREE_B: uint = 65536u;
30+
static TAG_FOUR_B: uint = 240u;
31+
2332
/*
2433
Lu Uppercase_Letter an uppercase letter
2534
Ll Lowercase_Letter a lowercase letter
@@ -278,6 +287,12 @@ pub trait Char {
278287
fn escape_unicode(&self, f: &fn(char));
279288
fn escape_default(&self, f: &fn(char));
280289
fn len_utf8_bytes(&self) -> uint;
290+
291+
/// Encodes this character as utf-8 into the provided byte-buffer. The
292+
/// buffer must be at least 4 bytes long or a runtime failure will occur.
293+
///
294+
/// This will then return the number of characters written to the slice.
295+
fn encode_utf8(&self, dst: &mut [u8]) -> uint;
281296
}
282297

283298
impl Char for char {
@@ -308,6 +323,29 @@ impl Char for char {
308323
fn escape_default(&self, f: &fn(char)) { escape_default(*self, f) }
309324

310325
fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }
326+
327+
fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint {
328+
let code = *self as uint;
329+
if code < MAX_ONE_B {
330+
dst[0] = code as u8;
331+
return 1;
332+
} else if code < MAX_TWO_B {
333+
dst[0] = (code >> 6u & 31u | TAG_TWO_B) as u8;
334+
dst[1] = (code & 63u | TAG_CONT) as u8;
335+
return 2;
336+
} else if code < MAX_THREE_B {
337+
dst[0] = (code >> 12u & 15u | TAG_THREE_B) as u8;
338+
dst[1] = (code >> 6u & 63u | TAG_CONT) as u8;
339+
dst[2] = (code & 63u | TAG_CONT) as u8;
340+
return 3;
341+
} else {
342+
dst[0] = (code >> 18u & 7u | TAG_FOUR_B) as u8;
343+
dst[1] = (code >> 12u & 63u | TAG_CONT) as u8;
344+
dst[2] = (code >> 6u & 63u | TAG_CONT) as u8;
345+
dst[3] = (code & 63u | TAG_CONT) as u8;
346+
return 4;
347+
}
348+
}
311349
}
312350

313351
#[cfg(not(test))]

0 commit comments

Comments
 (0)