Skip to content

Commit 86d51ec

Browse files
committed
Get test-pass/utf8.rs to run
This involved a small fix to the unicode-escape character lexing and to the pretty-printer.
1 parent dc2cdbf commit 86d51ec

File tree

2 files changed

+25
-44
lines changed

2 files changed

+25
-44
lines changed

src/comp/front/lexer.rs

Lines changed: 23 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -410,37 +410,18 @@ fn scan_number(char c, &reader rdr) -> token::token {
410410
}
411411
}
412412

413-
fn scan_numeric_escape(&reader rdr) -> char {
414-
415-
auto n_hex_digits = 0;
416-
417-
assert (rdr.curr() == '\\');
418-
419-
alt (rdr.next()) {
420-
case ('x') { n_hex_digits = 2; }
421-
case ('u') { n_hex_digits = 4; }
422-
case ('U') { n_hex_digits = 8; }
423-
case (?c) {
424-
rdr.err(#fmt("unknown numeric character escape: %d", c as int));
425-
fail;
426-
}
427-
}
428-
429-
rdr.bump(); // advance curr past \
430-
431-
auto n = rdr.next();
413+
fn scan_numeric_escape(&reader rdr, uint n_hex_digits) -> char {
432414
auto accum_int = 0;
433-
434-
while (n_hex_digits != 0) {
415+
while (n_hex_digits != 0u) {
416+
auto n = rdr.curr();
417+
rdr.bump();
435418
if (!is_hex_digit(n)) {
436419
rdr.err(#fmt("illegal numeric character escape: %d", n as int));
437420
fail;
438421
}
439422
accum_int *= 16;
440423
accum_int += hex_digit_val(n);
441-
rdr.bump();
442-
n = rdr.next();
443-
n_hex_digits -= 1;
424+
n_hex_digits -= 1u;
444425
}
445426
ret accum_int as char;
446427
}
@@ -583,76 +564,76 @@ fn next_token(&reader rdr) -> token::token {
583564
case ('\'') {
584565
rdr.bump();
585566
auto c2 = rdr.curr();
567+
rdr.bump();
586568
if (c2 == '\\') {
587-
alt (rdr.next()) {
569+
auto escaped = rdr.curr();
570+
rdr.bump();
571+
alt (escaped) {
588572
case ('n') { c2 = '\n'; }
589573
case ('r') { c2 = '\r'; }
590574
case ('t') { c2 = '\t'; }
591575
case ('\\') { c2 = '\\'; }
592576
case ('\'') { c2 = '\''; }
593577

594-
case ('x') { c2 = scan_numeric_escape(rdr); }
595-
case ('u') { c2 = scan_numeric_escape(rdr); }
596-
case ('U') { c2 = scan_numeric_escape(rdr); }
578+
case ('x') { c2 = scan_numeric_escape(rdr, 2u); }
579+
case ('u') { c2 = scan_numeric_escape(rdr, 4u); }
580+
case ('U') { c2 = scan_numeric_escape(rdr, 8u); }
597581

598582
case (?c2) {
599583
rdr.err(#fmt("unknown character escape: %d",
600584
c2 as int));
601585
fail;
602586
}
603587
}
604-
rdr.bump();
605588
}
606589

607-
if (rdr.next() != '\'') {
590+
if (rdr.curr() != '\'') {
608591
rdr.err("unterminated character constant");
609592
fail;
610593
}
611-
rdr.bump(); // advance curr to closing '
612594
rdr.bump(); // advance curr past token
613595
ret token::LIT_CHAR(c2);
614596
}
615597

616598
case ('"') {
617599
rdr.bump();
618600
while (rdr.curr() != '"') {
619-
alt (rdr.curr()) {
601+
auto ch = rdr.curr();
602+
rdr.bump();
603+
alt (ch) {
620604
case ('\\') {
621-
alt (rdr.next()) {
605+
auto escaped = rdr.curr();
606+
rdr.bump();
607+
alt (escaped) {
622608
case ('n') {
623-
rdr.bump();
624609
str::push_byte(accum_str, '\n' as u8);
625610
}
626611
case ('r') {
627-
rdr.bump();
628612
str::push_byte(accum_str, '\r' as u8);
629613
}
630614
case ('t') {
631-
rdr.bump();
632615
str::push_byte(accum_str, '\t' as u8);
633616
}
634617
case ('\\') {
635-
rdr.bump();
636618
str::push_byte(accum_str, '\\' as u8);
637619
}
638620
case ('"') {
639-
rdr.bump();
640621
str::push_byte(accum_str, '"' as u8);
641622
}
642623

643624
case ('x') {
644625
str::push_char(accum_str,
645-
scan_numeric_escape(rdr));
626+
scan_numeric_escape(rdr, 2u));
646627
}
647628

648629
case ('u') {
649630
str::push_char(accum_str,
650-
scan_numeric_escape(rdr));
631+
scan_numeric_escape(rdr, 4u));
651632
}
652633

653634
case ('U') {
654635
str::push_char(accum_str,
655-
scan_numeric_escape(rdr));
636+
scan_numeric_escape(rdr, 8u));
656637
}
657638

658639
case (?c2) {
@@ -663,10 +644,9 @@ fn next_token(&reader rdr) -> token::token {
663644
}
664645
}
665646
case (_) {
666-
str::push_char(accum_str, rdr.curr());
647+
str::push_char(accum_str, ch);
667648
}
668649
}
669-
rdr.bump();
670650
}
671651
rdr.bump();
672652
ret token::LIT_STR(interner::intern[str](*rdr.get_interner(),

src/comp/pretty/pprust.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,8 @@ fn escape_str(str st, char to_escape) -> str {
957957
case ('\\') {out += "\\\\";}
958958
case (?cur) {
959959
if (cur == to_escape) {out += "\\";}
960-
str::push_byte(out, cur as u8);
960+
// FIXME some (or all?) non-ascii things should be escaped
961+
str::push_char(out, cur);
961962
}
962963
}
963964
i += 1u;

0 commit comments

Comments
 (0)