Skip to content

Commit b820748

Browse files
committed
Implement formatting arguments for strings and integers
Closes rust-lang#1651
1 parent 44675ac commit b820748

File tree

6 files changed

+295
-105
lines changed

6 files changed

+295
-105
lines changed

src/libstd/char.rs

+38
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@ use unicode::{derived_property, general_category};
2020
#[cfg(not(test))] use cmp::{Eq, Ord};
2121
#[cfg(not(test))] use num::Zero;
2222

23+
// UTF-8 ranges and tags for encoding characters
24+
static TAG_CONT: uint = 128u;
25+
static MAX_ONE_B: uint = 128u;
26+
static TAG_TWO_B: uint = 192u;
27+
static MAX_TWO_B: uint = 2048u;
28+
static TAG_THREE_B: uint = 224u;
29+
static MAX_THREE_B: uint = 65536u;
30+
static TAG_FOUR_B: uint = 240u;
31+
2332
/*
2433
Lu Uppercase_Letter an uppercase letter
2534
Ll Lowercase_Letter a lowercase letter
@@ -278,6 +287,12 @@ pub trait Char {
278287
fn escape_unicode(&self, f: &fn(char));
279288
fn escape_default(&self, f: &fn(char));
280289
fn len_utf8_bytes(&self) -> uint;
290+
291+
/// Encodes this character as utf-8 into the provided byte-buffer. The
292+
/// buffer must be at least 4 bytes long or a runtime failure will occur.
293+
///
294+
/// This will then return the number of characters written to the slice.
295+
fn encode_utf8(&self, dst: &mut [u8]) -> uint;
281296
}
282297

283298
impl Char for char {
@@ -308,6 +323,29 @@ impl Char for char {
308323
fn escape_default(&self, f: &fn(char)) { escape_default(*self, f) }
309324

310325
fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }
326+
327+
fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint {
328+
let code = *self as uint;
329+
if code < MAX_ONE_B {
330+
dst[0] = code as u8;
331+
return 1;
332+
} else if code < MAX_TWO_B {
333+
dst[0] = (code >> 6u & 31u | TAG_TWO_B) as u8;
334+
dst[1] = (code & 63u | TAG_CONT) as u8;
335+
return 2;
336+
} else if code < MAX_THREE_B {
337+
dst[0] = (code >> 12u & 15u | TAG_THREE_B) as u8;
338+
dst[1] = (code >> 6u & 63u | TAG_CONT) as u8;
339+
dst[2] = (code & 63u | TAG_CONT) as u8;
340+
return 3;
341+
} else {
342+
dst[0] = (code >> 18u & 7u | TAG_FOUR_B) as u8;
343+
dst[1] = (code >> 12u & 63u | TAG_CONT) as u8;
344+
dst[2] = (code >> 6u & 63u | TAG_CONT) as u8;
345+
dst[3] = (code & 63u | TAG_CONT) as u8;
346+
return 4;
347+
}
348+
}
311349
}
312350

313351
#[cfg(not(test))]

src/libstd/fmt/mod.rs

+145-22
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
use prelude::*;
1212

1313
use cast;
14-
use int;
14+
use char::Char;
1515
use rt::io::Decorator;
1616
use rt::io::mem::MemWriter;
1717
use rt::io;
@@ -122,6 +122,11 @@ pub unsafe fn sprintf(fmt: &[rt::Piece], args: &[Argument]) -> ~str {
122122
}
123123

124124
impl<'self> Formatter<'self> {
125+
126+
// First up is the collection of functions used to execute a format string
127+
// at runtime. This consumes all of the compile-time statics generated by
128+
// the ifmt! syntax extension.
129+
125130
fn run(&mut self, piece: &rt::Piece, cur: Option<&str>) {
126131
let setcount = |slot: &mut Option<uint>, cnt: &parse::Count| {
127132
match *cnt {
@@ -240,6 +245,118 @@ impl<'self> Formatter<'self> {
240245
}
241246
}
242247
}
248+
249+
// Helper methods used for padding and processing formatting arguments that
250+
// all formatting traits can use.
251+
252+
/// TODO: dox
253+
pub fn pad_integral(&mut self, s: &[u8], alternate_prefix: &str,
254+
positive: bool) {
255+
use fmt::parse::{FlagAlternate, FlagSignPlus};
256+
257+
let mut actual_len = s.len();
258+
if self.flags & 1 << (FlagAlternate as uint) != 0 {
259+
actual_len += alternate_prefix.len();
260+
}
261+
if self.flags & 1 << (FlagSignPlus as uint) != 0 {
262+
actual_len += 1;
263+
}
264+
if !positive {
265+
actual_len += 1;
266+
}
267+
268+
let emit = |this: &mut Formatter| {
269+
if this.flags & 1 << (FlagSignPlus as uint) != 0 && positive {
270+
this.buf.write(['+' as u8]);
271+
} else if !positive {
272+
this.buf.write(['-' as u8]);
273+
}
274+
if this.flags & 1 << (FlagAlternate as uint) != 0 {
275+
this.buf.write(alternate_prefix.as_bytes());
276+
}
277+
this.buf.write(s);
278+
};
279+
280+
match self.width {
281+
None => { emit(self) }
282+
Some(min) if actual_len >= min => { emit(self) }
283+
Some(min) => {
284+
do self.with_padding(min - actual_len) |me| {
285+
emit(me);
286+
}
287+
}
288+
}
289+
}
290+
291+
/// This function takes a string slice and emits it to the internal buffer
292+
/// after applying the relevant formatting flags specified. The flags
293+
/// recognized for generic strings are:
294+
///
295+
/// * width - the minimum width of what to emit
296+
/// * fill/alignleft - what to emit and where to emit it if the string
297+
/// provided needs to be padded
298+
/// * precision - the maximum length to emit, the string is truncated if it
299+
/// is longer than this length
300+
///
301+
/// Notably this function ignored the `flag` parameters
302+
pub fn pad(&mut self, s: &str) {
303+
// Make sure there's a fast path up front
304+
if self.width.is_none() && self.precision.is_none() {
305+
self.buf.write(s.as_bytes());
306+
return
307+
}
308+
// The `precision` field can be interpreted as a `max-width` for the
309+
// string being formatted
310+
match self.precision {
311+
Some(max) => {
312+
// If there's a maximum width and our string is longer than
313+
// that, then we must always have truncation. This is the only
314+
// case where the maximum length will matter.
315+
let char_len = s.char_len();
316+
if char_len >= max {
317+
let nchars = uint::min(max, char_len);
318+
self.buf.write(s.slice_chars(0, nchars).as_bytes());
319+
return
320+
}
321+
}
322+
None => {}
323+
}
324+
325+
// The `width` field is more of a `min-width` parameter at this point.
326+
match self.width {
327+
// If we're under the maximum length, and there's no minimum length
328+
// requirements, then we can just emit the string
329+
None => { self.buf.write(s.as_bytes()) }
330+
331+
// If we're under the maximum width, check if we're over the minimum
332+
// width, if so it's as easy as just emitting the string.
333+
Some(width) if s.char_len() >= width => {
334+
self.buf.write(s.as_bytes())
335+
}
336+
337+
// If we're under both the maximum and the minimum width, then fill
338+
// up the minimum width with the specified string + some alignment.
339+
Some(width) => {
340+
do self.with_padding(width - s.len()) |me| {
341+
me.buf.write(s.as_bytes());
342+
}
343+
}
344+
}
345+
}
346+
347+
fn with_padding(&mut self, padding: uint, f: &fn(&mut Formatter)) {
348+
if self.alignleft {
349+
f(self);
350+
}
351+
let mut fill = [0u8, ..4];
352+
let len = self.fill.encode_utf8(fill);
353+
for _ in range(0, padding) {
354+
self.buf.write(fill.slice_to(len));
355+
}
356+
if !self.alignleft {
357+
f(self);
358+
}
359+
}
243360
}
244361

245362
/// This is a function which calls are emitted to by the compiler itself to
@@ -279,60 +396,53 @@ impl Bool for bool {
279396

280397
impl<'self> String for &'self str {
281398
fn fmt(s: & &'self str, f: &mut Formatter) {
282-
// XXX: formatting args
283-
f.buf.write(s.as_bytes())
399+
f.pad(*s);
284400
}
285401
}
286402

287403
impl Char for char {
288404
fn fmt(c: &char, f: &mut Formatter) {
289-
// XXX: formatting args
290-
// XXX: shouldn't require an allocation
291-
let mut s = ~"";
292-
s.push_char(*c);
293-
f.buf.write(s.as_bytes());
405+
let mut utf8 = [0u8, ..4];
406+
let amt = c.encode_utf8(utf8);
407+
let s: &str = unsafe { cast::transmute(utf8.slice_to(amt)) };
408+
String::fmt(&s, f);
294409
}
295410
}
296411

297412
impl Signed for int {
298413
fn fmt(c: &int, f: &mut Formatter) {
299-
// XXX: formatting args
300-
do int::to_str_bytes(*c, 10) |buf| {
301-
f.buf.write(buf);
414+
do uint::to_str_bytes(c.abs() as uint, 10) |buf| {
415+
f.pad_integral(buf, "", *c >= 0);
302416
}
303417
}
304418
}
305419

306420
impl Unsigned for uint {
307421
fn fmt(c: &uint, f: &mut Formatter) {
308-
// XXX: formatting args
309422
do uint::to_str_bytes(*c, 10) |buf| {
310-
f.buf.write(buf);
423+
f.pad_integral(buf, "", true);
311424
}
312425
}
313426
}
314427

315428
impl Octal for uint {
316429
fn fmt(c: &uint, f: &mut Formatter) {
317-
// XXX: formatting args
318430
do uint::to_str_bytes(*c, 8) |buf| {
319-
f.buf.write(buf);
431+
f.pad_integral(buf, "0o", true);
320432
}
321433
}
322434
}
323435

324436
impl LowerHex for uint {
325437
fn fmt(c: &uint, f: &mut Formatter) {
326-
// XXX: formatting args
327438
do uint::to_str_bytes(*c, 16) |buf| {
328-
f.buf.write(buf);
439+
f.pad_integral(buf, "0x", true);
329440
}
330441
}
331442
}
332443

333444
impl UpperHex for uint {
334445
fn fmt(c: &uint, f: &mut Formatter) {
335-
// XXX: formatting args
336446
do uint::to_str_bytes(*c, 16) |buf| {
337447
let mut local = [0u8, ..16];
338448
for (l, &b) in local.mut_iter().zip(buf.iter()) {
@@ -341,16 +451,29 @@ impl UpperHex for uint {
341451
_ => b,
342452
};
343453
}
344-
f.buf.write(local.slice_to(buf.len()));
454+
f.pad_integral(local.slice_to(buf.len()), "0x", true);
345455
}
346456
}
347457
}
348458

349459
impl<T> Poly for T {
350460
fn fmt(t: &T, f: &mut Formatter) {
351-
// XXX: formatting args
352-
let s = sys::log_str(t);
353-
f.buf.write(s.as_bytes());
461+
match (f.width, f.precision) {
462+
(None, None) => {
463+
// XXX: sys::log_str should have a variant which takes a stream
464+
// and we should directly call that (avoids unnecessary
465+
// allocations)
466+
let s = sys::log_str(t);
467+
f.buf.write(s.as_bytes());
468+
}
469+
470+
// If we have a specified width for formatting, then we have to make
471+
// this allocation of a new string
472+
_ => {
473+
let s = sys::log_str(t);
474+
f.pad(s);
475+
}
476+
}
354477
}
355478
}
356479

src/libstd/str.rs

+12-41
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ use ptr;
3333
use ptr::RawPtr;
3434
use to_str::ToStr;
3535
use uint;
36+
use unstable::raw::{Repr, Slice};
3637
use vec;
3738
use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
3839

@@ -758,15 +759,7 @@ macro_rules! utf8_acc_cont_byte(
758759
($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as uint)
759760
)
760761

761-
// UTF-8 tags and ranges
762762
static TAG_CONT_U8: u8 = 128u8;
763-
static TAG_CONT: uint = 128u;
764-
static MAX_ONE_B: uint = 128u;
765-
static TAG_TWO_B: uint = 192u;
766-
static MAX_TWO_B: uint = 2048u;
767-
static TAG_THREE_B: uint = 224u;
768-
static MAX_THREE_B: uint = 65536u;
769-
static TAG_FOUR_B: uint = 240u;
770763
static MAX_UNICODE: uint = 1114112u;
771764

772765
/// Unsafe operations
@@ -1988,40 +1981,18 @@ impl OwnedStr for ~str {
19881981
#[inline]
19891982
fn push_char(&mut self, c: char) {
19901983
assert!((c as uint) < MAX_UNICODE); // FIXME: #7609: should be enforced on all `char`
1984+
let cur_len = self.len();
1985+
self.reserve_at_least(cur_len + 4); // may use up to 4 bytes
1986+
1987+
// Attempt to not use an intermediate buffer by just pushing bytes
1988+
// directly onto this string.
19911989
unsafe {
1992-
let code = c as uint;
1993-
let nb = if code < MAX_ONE_B { 1u }
1994-
else if code < MAX_TWO_B { 2u }
1995-
else if code < MAX_THREE_B { 3u }
1996-
else { 4u };
1997-
let len = self.len();
1998-
let new_len = len + nb;
1999-
self.reserve_at_least(new_len);
2000-
let off = len as int;
2001-
do self.as_mut_buf |buf, _len| {
2002-
match nb {
2003-
1u => {
2004-
*ptr::mut_offset(buf, off) = code as u8;
2005-
}
2006-
2u => {
2007-
*ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8;
2008-
*ptr::mut_offset(buf, off + 1) = (code & 63u | TAG_CONT) as u8;
2009-
}
2010-
3u => {
2011-
*ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8;
2012-
*ptr::mut_offset(buf, off + 1) = (code >> 6u & 63u | TAG_CONT) as u8;
2013-
*ptr::mut_offset(buf, off + 2) = (code & 63u | TAG_CONT) as u8;
2014-
}
2015-
4u => {
2016-
*ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8;
2017-
*ptr::mut_offset(buf, off + 1) = (code >> 12u & 63u | TAG_CONT) as u8;
2018-
*ptr::mut_offset(buf, off + 2) = (code >> 6u & 63u | TAG_CONT) as u8;
2019-
*ptr::mut_offset(buf, off + 3) = (code & 63u | TAG_CONT) as u8;
2020-
}
2021-
_ => {}
2022-
}
2023-
}
2024-
raw::set_len(self, new_len);
1990+
let v = self.repr();
1991+
let len = c.encode_utf8(cast::transmute(Slice {
1992+
data: ((&(*v).data) as *u8).offset(cur_len as int),
1993+
len: 4,
1994+
}));
1995+
raw::set_len(self, cur_len + len);
20251996
}
20261997
}
20271998

src/libstd/unstable/raw.rs

+1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ impl<'self, T> Repr<Slice<T>> for &'self [T] {}
5656
impl<'self> Repr<Slice<u8>> for &'self str {}
5757
impl<T> Repr<*Box<T>> for @T {}
5858
impl<T> Repr<*Box<Vec<T>>> for @[T] {}
59+
impl Repr<*String> for ~str {}
5960

6061
// sure would be nice to have this
6162
// impl<T> Repr<*Vec<T>> for ~[T] {}

0 commit comments

Comments
 (0)