Skip to content

Commit 3321880

Browse files
committed
Merge remote-tracking branch 'killerswan/fixing_strings_2'
Conflicts: src/comp/driver/driver.rs src/comp/middle/trans/base.rs src/comp/syntax/parse/lexer.rs
2 parents 7e620f8 + 3afc16f commit 3321880

File tree

20 files changed

+96
-63
lines changed

20 files changed

+96
-63
lines changed

src/comp/metadata/decoder.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ fn item_family(item: ebml::doc) -> u8 {
8383

8484
fn item_symbol(item: ebml::doc) -> str {
8585
let sym = ebml::get_doc(item, tag_items_data_item_symbol);
86-
ret str::unsafe_from_bytes(ebml::doc_data(sym));
86+
ret str::from_bytes(ebml::doc_data(sym));
8787
}
8888

8989
fn variant_enum_id(d: ebml::doc) -> ast::def_id {
@@ -162,7 +162,7 @@ fn enum_variant_ids(item: ebml::doc, cdata: cmd) -> [ast::def_id] {
162162
// definition the path refers to.
163163
fn resolve_path(path: [ast::ident], data: @[u8]) -> [ast::def_id] {
164164
fn eq_item(data: [u8], s: str) -> bool {
165-
ret str::eq(str::unsafe_from_bytes(data), s);
165+
ret str::eq(str::from_bytes(data), s);
166166
}
167167
let s = str::connect(path, "::");
168168
let md = ebml::new_doc(data);
@@ -178,7 +178,7 @@ fn resolve_path(path: [ast::ident], data: @[u8]) -> [ast::def_id] {
178178

179179
fn item_name(item: ebml::doc) -> ast::ident {
180180
let name = ebml::get_doc(item, tag_paths_data_name);
181-
str::unsafe_from_bytes(ebml::doc_data(name))
181+
str::from_bytes(ebml::doc_data(name))
182182
}
183183

184184
fn lookup_item_name(data: @[u8], id: ast::node_id) -> ast::ident {
@@ -325,7 +325,7 @@ fn read_path(d: ebml::doc) -> {path: str, pos: uint} {
325325
let desc = ebml::doc_data(d);
326326
let pos = ebml::be_uint_from_bytes(@desc, 0u, 4u);
327327
let pathbytes = vec::slice::<u8>(desc, 4u, vec::len::<u8>(desc));
328-
let path = str::unsafe_from_bytes(pathbytes);
328+
let path = str::from_bytes(pathbytes);
329329
ret {path: path, pos: pos};
330330
}
331331

@@ -358,21 +358,21 @@ fn get_meta_items(md: ebml::doc) -> [@ast::meta_item] {
358358
let items: [@ast::meta_item] = [];
359359
ebml::tagged_docs(md, tag_meta_item_word) {|meta_item_doc|
360360
let nd = ebml::get_doc(meta_item_doc, tag_meta_item_name);
361-
let n = str::unsafe_from_bytes(ebml::doc_data(nd));
361+
let n = str::from_bytes(ebml::doc_data(nd));
362362
items += [attr::mk_word_item(n)];
363363
};
364364
ebml::tagged_docs(md, tag_meta_item_name_value) {|meta_item_doc|
365365
let nd = ebml::get_doc(meta_item_doc, tag_meta_item_name);
366366
let vd = ebml::get_doc(meta_item_doc, tag_meta_item_value);
367-
let n = str::unsafe_from_bytes(ebml::doc_data(nd));
368-
let v = str::unsafe_from_bytes(ebml::doc_data(vd));
367+
let n = str::from_bytes(ebml::doc_data(nd));
368+
let v = str::from_bytes(ebml::doc_data(vd));
369369
// FIXME (#611): Should be able to decode meta_name_value variants,
370370
// but currently they can't be encoded
371371
items += [attr::mk_name_value_item_str(n, v)];
372372
};
373373
ebml::tagged_docs(md, tag_meta_item_list) {|meta_item_doc|
374374
let nd = ebml::get_doc(meta_item_doc, tag_meta_item_name);
375-
let n = str::unsafe_from_bytes(ebml::doc_data(nd));
375+
let n = str::from_bytes(ebml::doc_data(nd));
376376
let subitems = get_meta_items(meta_item_doc);
377377
items += [attr::mk_list_item(n, subitems)];
378378
};
@@ -427,7 +427,7 @@ fn get_crate_deps(data: @[u8]) -> [crate_dep] {
427427
let depsdoc = ebml::get_doc(cratedoc, tag_crate_deps);
428428
let crate_num = 1;
429429
ebml::tagged_docs(depsdoc, tag_crate_dep) {|depdoc|
430-
let depname = str::unsafe_from_bytes(ebml::doc_data(depdoc));
430+
let depname = str::from_bytes(ebml::doc_data(depdoc));
431431
deps += [{cnum: crate_num, ident: depname}];
432432
crate_num += 1;
433433
};
@@ -447,7 +447,7 @@ fn list_crate_deps(data: @[u8], out: io::writer) {
447447
fn get_crate_hash(data: @[u8]) -> str {
448448
let cratedoc = ebml::new_doc(data);
449449
let hashdoc = ebml::get_doc(cratedoc, tag_crate_hash);
450-
ret str::unsafe_from_bytes(ebml::doc_data(hashdoc));
450+
ret str::from_bytes(ebml::doc_data(hashdoc));
451451
}
452452

453453
fn list_crate_items(bytes: @[u8], md: ebml::doc, out: io::writer) {

src/comp/metadata/encoder.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,7 @@ fn encode_hash(ebml_w: ebml::writer, hash: str) {
661661
ebml::end_tag(ebml_w);
662662
}
663663

664-
fn encode_metadata(cx: @crate_ctxt, crate: @crate) -> str {
664+
fn encode_metadata(cx: @crate_ctxt, crate: @crate) -> [u8] {
665665

666666
let abbrevs = ty::new_ty_hash();
667667
let ecx = @{ccx: cx, type_abbrevs: abbrevs};
@@ -694,7 +694,7 @@ fn encode_metadata(cx: @crate_ctxt, crate: @crate) -> str {
694694
// Pad this, since something (LLVM, presumably) is cutting off the
695695
// remaining % 4 bytes.
696696
buf_w.write([0u8, 0u8, 0u8, 0u8]);
697-
io::mem_buffer_str(buf)
697+
io::mem_buffer_buf(buf)
698698
}
699699

700700
// Get the encoded string for a type

src/comp/metadata/tydecode.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ fn parse_ident_(st: @pstate, is_last: fn@(char) -> bool) ->
3939
ast::ident {
4040
let rslt = "";
4141
while !is_last(peek(st) as char) {
42-
rslt += str::unsafe_from_byte(next(st));
42+
rslt += str::from_byte(next(st));
4343
}
4444
ret rslt;
4545
}
@@ -226,7 +226,7 @@ fn parse_ty(st: @pstate, conv: conv_did) -> ty::t {
226226
while peek(st) as char != ']' {
227227
let name = "";
228228
while peek(st) as char != '=' {
229-
name += str::unsafe_from_byte(next(st));
229+
name += str::from_byte(next(st));
230230
}
231231
st.pos = st.pos + 1u;
232232
fields += [{ident: name, mt: parse_mt(st, conv)}];

src/comp/middle/trans/base.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ fn sanitize(s: str) -> str {
268268
c != ' ' as u8 && c != '\t' as u8 && c != ';' as u8
269269
{
270270
let v = [c];
271-
result += str::unsafe_from_bytes(v);
271+
result += str::from_bytes(v);
272272
}
273273
}
274274
}
@@ -5412,7 +5412,7 @@ fn fill_crate_map(ccx: @crate_ctxt, map: ValueRef) {
54125412

54135413
fn write_metadata(cx: @crate_ctxt, crate: @ast::crate) {
54145414
if !cx.sess.building_library { ret; }
5415-
let llmeta = C_postr(metadata::encoder::encode_metadata(cx, crate));
5415+
let llmeta = C_bytes(metadata::encoder::encode_metadata(cx, crate));
54165416
let llconst = C_struct([llmeta]);
54175417
let llglobal = str::as_buf("rust_metadata", {|buf|
54185418
llvm::LLVMAddGlobal(cx.llmod, val_ty(llconst), buf)

src/comp/syntax/parse/lexer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ fn gather_comments_and_literals(cm: codemap::codemap,
672672
path: str,
673673
srdr: io::reader) ->
674674
{cmnts: [cmnt], lits: [lit]} {
675-
let src = @str::unsafe_from_bytes(srdr.read_whole_stream());
675+
let src = @str::from_bytes(srdr.read_whole_stream());
676676
let itr = @interner::mk::<str>(str::hash, str::eq);
677677
let rdr = new_reader(cm, span_diagnostic,
678678
codemap::new_filemap(path, src, 0u, 0u), itr);

src/comp/util/ppaux.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ fn ty_to_str(cx: ctxt, typ: t) -> str {
118118
}
119119
ty_var(v) { "<T" + int::str(v) + ">" }
120120
ty_param(id, _) {
121-
"'" + str::unsafe_from_bytes([('a' as u8) + (id as u8)])
121+
"'" + str::from_bytes([('a' as u8) + (id as u8)])
122122
}
123123
_ { ty_to_short_str(cx, typ) }
124124
}

src/compiletest/procsrv.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ fn readclose(fd: fd_t) -> str {
8585
let buf = "";
8686
while !reader.eof() {
8787
let bytes = reader.read_bytes(4096u);
88-
buf += str::unsafe_from_bytes(bytes);
88+
buf += str::from_bytes(bytes);
8989
}
9090
os::fclose(file);
9191
ret buf;
@@ -114,8 +114,8 @@ fn worker(p: port<request>) {
114114
// the alt discriminant are wrong.
115115
alt recv(p) {
116116
exec(lib_path, prog, args, respchan) {
117-
{lib_path: str::unsafe_from_bytes(lib_path),
118-
prog: str::unsafe_from_bytes(prog),
117+
{lib_path: str::from_bytes(lib_path),
118+
prog: str::from_bytes(prog),
119119
args: clone_vecu8str(args),
120120
respchan: respchan}
121121
}
@@ -189,7 +189,7 @@ fn clone_vecstr(v: [str]) -> [[u8]] {
189189
fn clone_vecu8str(v: [[u8]]) -> [str] {
190190
let r = [];
191191
for t in vec::slice(v, 0u, vec::len(v)) {
192-
r += [str::unsafe_from_bytes(t)];
192+
r += [str::from_bytes(t)];
193193
}
194194
ret r;
195195
}

src/libcore/extfmt.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ mod rt {
390390
fn str_init_elt(n_elts: uint, c: char) -> str {
391391
let svec = vec::init_elt::<u8>(n_elts, c as u8);
392392

393-
ret str::unsafe_from_bytes(svec);
393+
ret str::from_bytes(svec);
394394
}
395395
enum pad_mode { pad_signed, pad_unsigned, pad_nozero, }
396396
fn pad(cv: conv, s: str, mode: pad_mode) -> str {
@@ -439,7 +439,8 @@ mod rt {
439439
if signed && zero_padding && str::byte_len(s) > 0u {
440440
let head = s[0];
441441
if head == '+' as u8 || head == '-' as u8 || head == ' ' as u8 {
442-
let headstr = str::unsafe_from_bytes([head]);
442+
let headstr = str::from_bytes([head]);
443+
// FIXME: not UTF-8 safe
443444
let bytelen = str::byte_len(s);
444445
let numpart = str::substr(s, 1u, bytelen - 1u);
445446
ret headstr + padstr + numpart;

src/libcore/str.rs

Lines changed: 47 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ export
1313
// Creating a string
1414
from_bytes,
1515
unsafe_from_bytes,
16+
from_byte,
1617
unsafe_from_byte,
1718
//push_utf8_bytes,
1819
from_char,
@@ -117,14 +118,11 @@ Section: Creating a string
117118
/*
118119
Function: from_bytes
119120
120-
Safely convert a vector of bytes to a UTF-8 string, or error
121+
Convert a vector of bytes to a UTF-8 string. Fails if invalid UTF-8.
121122
*/
122-
fn from_bytes(vv: [u8]) -> result::t<str, str> {
123-
if is_utf8(vv) {
124-
ret result::ok(unsafe_from_bytes(vv));
125-
} else {
126-
ret result::err("vector doesn't contain valid UTF-8");
127-
}
123+
fn from_bytes(vv: [u8]) -> str {
124+
assert is_utf8(vv);
125+
ret unsafe_from_bytes(vv);
128126
}
129127

130128
/*
@@ -133,7 +131,7 @@ Function: unsafe_from_bytes
133131
Converts a vector of bytes to a string. Does not verify that the
134132
vector contains valid UTF-8.
135133
136-
// FIXME: remove?
134+
FIXME: stop exporting
137135
*/
138136
fn unsafe_from_bytes(v: [const u8]) -> str unsafe {
139137
let vcopy: [u8] = v + [0u8];
@@ -148,10 +146,20 @@ Function: unsafe_from_byte
148146
Converts a byte to a string. Does not verify that the byte is
149147
valid UTF-8.
150148
151-
FIXME: rename to 'from_byte'
149+
FIXME: stop exporting
152150
*/
153151
fn unsafe_from_byte(u: u8) -> str { unsafe_from_bytes([u]) }
154152

153+
154+
/*
155+
Function: from_byte
156+
157+
Convert a byte to a UTF-8 string. Fails if invalid UTF-8.
158+
*/
159+
fn from_byte(uu: u8) -> str {
160+
from_bytes([uu])
161+
}
162+
155163
fn push_utf8_bytes(&s: str, ch: char) {
156164
let code = ch as uint;
157165
let bytes =
@@ -209,16 +217,16 @@ Function: from_cstr
209217
Create a Rust string from a null-terminated C string
210218
*/
211219
unsafe fn from_cstr(cstr: sbuf) -> str {
212-
let res = "";
220+
let res = [];
213221
let start = cstr;
214222
let curr = start;
215223
let i = 0u;
216224
while *curr != 0u8 {
217-
push_byte(res, *curr);
225+
vec::push(res, *curr);
218226
i += 1u;
219227
curr = ptr::offset(start, i);
220228
}
221-
ret res;
229+
ret from_bytes(res);
222230
}
223231

224232
/*
@@ -526,7 +534,7 @@ fn split(s: str, sep: u8) -> [str] {
526534
v += [accum];
527535
accum = "";
528536
ends_with_sep = true;
529-
} else { accum += unsafe_from_byte(c); ends_with_sep = false; }
537+
} else { accum += from_byte(c); ends_with_sep = false; }
530538
}
531539
if byte_len(accum) != 0u || ends_with_sep { v += [accum]; }
532540
ret v;
@@ -554,7 +562,7 @@ fn splitn(s: str, sep: u8, count: uint) -> [str] {
554562
v += [accum];
555563
accum = "";
556564
ends_with_sep = true;
557-
} else { accum += unsafe_from_byte(c); ends_with_sep = false; }
565+
} else { accum += from_byte(c); ends_with_sep = false; }
558566
}
559567
if byte_len(accum) != 0u || ends_with_sep { v += [accum]; }
560568
ret v;
@@ -575,26 +583,26 @@ FIXME: should behave like split and split_char:
575583
*/
576584
fn split_str(s: str, sep: str) -> [str] {
577585
assert byte_len(sep) > 0u;
578-
let v: [str] = [], accum = "", sep_match = 0u, leading = true;
586+
let v: [str] = [], accum = [], sep_match = 0u, leading = true;
579587
for c: u8 in s {
580588
// Did we match the entire separator?
581589
if sep_match == byte_len(sep) {
582-
if !leading { v += [accum]; }
583-
accum = "";
590+
if !leading { vec::push(v, from_bytes(accum)); }
591+
accum = [];
584592
sep_match = 0u;
585593
}
586594

587595
if c == sep[sep_match] {
588596
sep_match += 1u;
589597
} else {
590598
sep_match = 0u;
591-
accum += unsafe_from_byte(c);
599+
vec::push(accum, c);
592600
leading = false;
593601
}
594602
}
595603

596-
if byte_len(accum) > 0u { v += [accum]; }
597-
if sep_match == byte_len(sep) { v += [""]; }
604+
if vec::len(accum) > 0u { vec::push(v, from_bytes(accum)); }
605+
if sep_match == byte_len(sep) { vec::push(v, ""); }
598606

599607
ret v;
600608
}
@@ -1783,7 +1791,24 @@ mod tests {
17831791
0x20_u8, 0x4e_u8, 0x61_u8,
17841792
0x6d_u8];
17851793

1786-
assert ss == result::get(from_bytes(bb));
1794+
assert ss == from_bytes(bb);
1795+
}
1796+
1797+
#[test]
1798+
#[should_fail]
1799+
fn test_from_bytes_fail() {
1800+
let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
1801+
0xe0_u8, 0xb9_u8, 0x84_u8,
1802+
0xe0_u8, 0xb8_u8, 0x97_u8,
1803+
0xe0_u8, 0xb8_u8, 0xa2_u8,
1804+
0xe4_u8, 0xb8_u8, 0xad_u8,
1805+
0xe5_u8, 0x8d_u8, 0x8e_u8,
1806+
0x56_u8, 0x69_u8, 0xe1_u8,
1807+
0xbb_u8, 0x87_u8, 0x74_u8,
1808+
0x20_u8, 0x4e_u8, 0x61_u8,
1809+
0x6d_u8];
1810+
1811+
let _x = from_bytes(bb);
17871812
}
17881813

17891814
#[test]
@@ -1821,7 +1846,7 @@ mod tests {
18211846
let s1: str = "All mimsy were the borogoves";
18221847

18231848
let v: [u8] = bytes(s1);
1824-
let s2: str = unsafe_from_bytes(v);
1849+
let s2: str = from_bytes(v);
18251850
let i: uint = 0u;
18261851
let n1: uint = byte_len(s1);
18271852
let n2: uint = vec::len::<u8>(v);

src/libcore/uint.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,12 +236,12 @@ fn to_str(num: uint, radix: uint) -> str {
236236
if n == 0u { ret "0"; }
237237
let s: str = "";
238238
while n != 0u {
239-
s += str::unsafe_from_byte(digit(n % radix) as u8);
239+
s += str::from_byte(digit(n % radix) as u8);
240240
n /= radix;
241241
}
242242
let s1: str = "";
243243
let len: uint = str::byte_len(s);
244-
while len != 0u { len -= 1u; s1 += str::unsafe_from_byte(s[len]); }
244+
while len != 0u { len -= 1u; s1 += str::from_byte(s[len]); }
245245
ret s1;
246246
}
247247

src/libstd/freebsd_os.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ fn dylib_filename(base: str) -> str { ret "lib" + base + ".so"; }
129129
/// followed by a path separator
130130
fn get_exe_path() -> option::t<fs::path> unsafe {
131131
let bufsize = 1023u;
132-
let path = str::unsafe_from_bytes(vec::init_elt(bufsize, 0u8));
132+
// FIXME: path "strings" will likely need fixing...
133+
let path = str::from_bytes(vec::init_elt(bufsize, 0u8));
133134
let mib = [libc_constants::CTL_KERN,
134135
libc_constants::KERN_PROC,
135136
libc_constants::KERN_PROC_PATHNAME, -1i32];

src/libstd/generic_os.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ fn getenv(n: str) -> option::t<str> {
7575
unsafe {
7676
vec::unsafe::set_len(v, res);
7777
}
78-
ret option::some(str::unsafe_from_bytes(v));
78+
ret option::some(str::from_bytes(v)); // UTF-8 or fail
7979
} else { nsize = res; }
8080
}
8181
fail;

0 commit comments

Comments
 (0)