Skip to content

Commit 4a0f4f5

Browse files
committed
Refactor the lexer to use FilePos types
1 parent b1dff40 commit 4a0f4f5

File tree

6 files changed

+85
-54
lines changed

6 files changed

+85
-54
lines changed

src/libsyntax/codemap.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -200,11 +200,8 @@ pub impl FileMap {
200200
start_pos);
201201
}
202202

203-
fn next_line(@self, +chpos: CharPos, +byte_pos: BytePos) {
204-
self.lines.push(FilePos {
205-
ch: chpos,
206-
byte: byte_pos + self.start_pos.byte
207-
});
203+
fn next_line(@self, +pos: FilePos) {
204+
self.lines.push(pos);
208205
}
209206

210207
pub fn get_line(@self, line: int) -> ~str unsafe {
@@ -231,6 +228,18 @@ pub impl CodeMap {
231228
}
232229

233230
pub fn add_filemap(@self, filemap: @FileMap) {
231+
let expected_byte_pos = if self.files.len() == 0 {
232+
0
233+
} else {
234+
let last_start = self.files.last().start_pos.byte.to_uint();
235+
let last_len = self.files.last().src.len();
236+
last_start + last_len
237+
};
238+
let actual_byte_pos = filemap.start_pos.byte.to_uint();
239+
debug!("codemap: adding filemap: %s", filemap.name);
240+
debug!("codemap: expected offset: %u", expected_byte_pos);
241+
debug!("codemap: actual offset: %u", actual_byte_pos);
242+
assert expected_byte_pos == actual_byte_pos;
234243
self.files.push(filemap);
235244
}
236245

src/libsyntax/ext/source_util.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,13 @@ fn expand_include(cx: ext_ctxt, sp: span, arg: ast::mac_arg,
5858
_body: ast::mac_body) -> @ast::expr {
5959
let args = get_mac_args(cx, sp, arg, 1u, option::Some(1u), ~"include");
6060
let file = expr_to_str(cx, args[0], ~"#include_str requires a string");
61-
let p = parse::new_parser_from_file(cx.parse_sess(), cx.cfg(),
62-
&res_rel_file(cx, sp, &Path(file)),
63-
parse::parser::SOURCE_FILE);
64-
return p.parse_expr();
61+
let (p, rdr) = parse::new_parser_etc_from_file(
62+
cx.parse_sess(), cx.cfg(),
63+
&res_rel_file(cx, sp, &Path(file)),
64+
parse::parser::SOURCE_FILE);
65+
let e = p.parse_expr();
66+
parse::update_parse_sess_position(&cx.parse_sess(), &rdr);
67+
return e;
6568
}
6669

6770
fn expand_include_str(cx: ext_ctxt, sp: codemap::span, arg: ast::mac_arg,

src/libsyntax/parse.rs

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ export parse_crate_from_source_str;
1111
export parse_expr_from_source_str, parse_item_from_source_str;
1212
export parse_stmt_from_source_str;
1313
export parse_from_source_str;
14+
export update_parse_sess_position;
1415

1516
use parser::Parser;
1617
use attr::parser_attr;
@@ -76,7 +77,7 @@ fn parse_crate_from_crate_file(input: &Path, cfg: ast::crate_cfg,
7677
let leading_attrs = p.parse_inner_attrs_and_next();
7778
let { inner: crate_attrs, next: first_cdir_attr } = leading_attrs;
7879
let cdirs = p.parse_crate_directives(token::EOF, first_cdir_attr);
79-
eval::update_parse_sess_position(&sess, &rdr);
80+
update_parse_sess_position(&sess, &rdr);
8081
let cx = @{sess: sess, cfg: /* FIXME (#2543) */ copy p.cfg};
8182
let companionmod = input.filestem().map(|s| Path(*s));
8283
let (m, attrs) = eval::eval_crate_directives_to_mod(
@@ -96,7 +97,7 @@ fn parse_crate_from_source_file(input: &Path, cfg: ast::crate_cfg,
9697
let (p, rdr) = new_parser_etc_from_file(sess, cfg, input,
9798
parser::SOURCE_FILE);
9899
let r = p.parse_crate_mod(cfg);
99-
eval::update_parse_sess_position(&sess, &rdr);
100+
update_parse_sess_position(&sess, &rdr);
100101
return r;
101102
}
102103
@@ -106,7 +107,7 @@ fn parse_crate_from_source_str(name: ~str, source: @~str, cfg: ast::crate_cfg,
106107
codemap::FssNone, source);
107108
let r = p.parse_crate_mod(cfg);
108109
p.abort_if_errors();
109-
eval::update_parse_sess_position(&sess, &rdr);
110+
update_parse_sess_position(&sess, &rdr);
110111
return r;
111112
}
112113
@@ -116,7 +117,7 @@ fn parse_expr_from_source_str(name: ~str, source: @~str, cfg: ast::crate_cfg,
116117
codemap::FssNone, source);
117118
let r = p.parse_expr();
118119
p.abort_if_errors();
119-
eval::update_parse_sess_position(&sess, &rdr);
120+
update_parse_sess_position(&sess, &rdr);
120121
return r;
121122
}
122123
@@ -127,7 +128,7 @@ fn parse_item_from_source_str(name: ~str, source: @~str, cfg: ast::crate_cfg,
127128
codemap::FssNone, source);
128129
let r = p.parse_item(attrs);
129130
p.abort_if_errors();
130-
eval::update_parse_sess_position(&sess, &rdr);
131+
update_parse_sess_position(&sess, &rdr);
131132
return r;
132133
}
133134
@@ -138,7 +139,7 @@ fn parse_stmt_from_source_str(name: ~str, source: @~str, cfg: ast::crate_cfg,
138139
codemap::FssNone, source);
139140
let r = p.parse_stmt(attrs);
140141
p.abort_if_errors();
141-
eval::update_parse_sess_position(&sess, &rdr);
142+
update_parse_sess_position(&sess, &rdr);
142143
return r;
143144
}
144145
@@ -155,7 +156,7 @@ fn parse_from_source_str<T>(f: fn (p: Parser) -> T,
155156
p.reader.fatal(~"expected end-of-string");
156157
}
157158
p.abort_if_errors();
158-
eval::update_parse_sess_position(&sess, &rdr);
159+
update_parse_sess_position(&sess, &rdr);
159160
move r
160161
}
161162

@@ -216,3 +217,10 @@ fn new_parser_from_tt(sess: parse_sess, cfg: ast::crate_cfg,
216217
None, tt);
217218
return Parser(sess, cfg, trdr as reader, parser::SOURCE_FILE)
218219
}
220+
221+
fn update_parse_sess_position(sess: &parse_sess, r: &lexer::string_reader) {
222+
sess.pos = FilePos {
223+
ch: r.last_pos.ch,
224+
byte: r.last_pos.byte
225+
};
226+
}

src/libsyntax/parse/comments.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ fn consume_non_eol_whitespace(rdr: string_reader) {
131131
fn push_blank_line_comment(rdr: string_reader, comments: &mut ~[cmnt]) {
132132
debug!(">>> blank-line comment");
133133
let v: ~[~str] = ~[];
134-
comments.push({style: blank_line, lines: v, pos: rdr.chpos});
134+
comments.push({style: blank_line, lines: v, pos: rdr.last_pos.ch});
135135
}
136136
137137
fn consume_whitespace_counting_blank_lines(rdr: string_reader,
@@ -148,7 +148,7 @@ fn consume_whitespace_counting_blank_lines(rdr: string_reader,
148148
fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
149149
comments: &mut ~[cmnt]) {
150150
debug!(">>> shebang comment");
151-
let p = rdr.chpos;
151+
let p = rdr.last_pos.ch;
152152
debug!("<<< shebang comment");
153153
comments.push({
154154
style: if code_to_the_left { trailing } else { isolated },
@@ -160,7 +160,7 @@ fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
160160
fn read_line_comments(rdr: string_reader, code_to_the_left: bool,
161161
comments: &mut ~[cmnt]) {
162162
debug!(">>> line comments");
163-
let p = rdr.chpos;
163+
let p = rdr.last_pos.ch;
164164
let mut lines: ~[~str] = ~[];
165165
while rdr.curr == '/' && nextch(rdr) == '/' {
166166
let line = read_one_line_comment(rdr);
@@ -209,7 +209,7 @@ fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
209209
fn read_block_comment(rdr: string_reader, code_to_the_left: bool,
210210
comments: &mut ~[cmnt]) {
211211
debug!(">>> block comment");
212-
let p = rdr.chpos;
212+
let p = rdr.last_pos.ch;
213213
let mut lines: ~[~str] = ~[];
214214
let mut col: CharPos = rdr.col;
215215
bump(rdr);
@@ -319,7 +319,7 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
319319
}
320320
321321
322-
let bstart = rdr.pos;
322+
let bstart = rdr.pos.byte;
323323
rdr.next_token();
324324
//discard, and look ahead; we're working with internal state
325325
let {tok: tok, sp: sp} = rdr.peek();

src/libsyntax/parse/eval.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1+
use parse::update_parse_sess_position;
12
use parser::{Parser, SOURCE_FILE};
23
use attr::parser_attr;
34

45
export eval_crate_directives_to_mod;
5-
export update_parse_sess_position;
66

77
type ctx =
88
@{sess: parse::parse_sess,
@@ -74,13 +74,6 @@ fn parse_companion_mod(cx: ctx, prefix: &Path, suffix: &Option<Path>)
7474
}
7575
}
7676

77-
fn update_parse_sess_position(sess: &parse_sess, r: &lexer::string_reader) {
78-
sess.pos = FilePos {
79-
ch: r.chpos,
80-
byte: sess.pos.byte + r.pos
81-
};
82-
}
83-
8477
fn cdir_path_opt(default: ~str, attrs: ~[ast::attribute]) -> ~str {
8578
match ::attr::first_attr_value_str_by_name(attrs, ~"path") {
8679
Some(d) => d,

src/libsyntax/parse/lexer.rs

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use diagnostic::span_handler;
2-
use codemap::{span, CodeMap, CharPos, BytePos};
2+
use codemap::{span, CodeMap, CharPos, BytePos, FilePos};
33
use ext::tt::transcribe::{tt_reader, new_tt_reader, dup_tt_reader,
44
tt_next_token};
55

@@ -21,10 +21,10 @@ trait reader {
2121
type string_reader = @{
2222
span_diagnostic: span_handler,
2323
src: @~str,
24+
mut pos: FilePos,
25+
mut last_pos: FilePos,
2426
mut col: CharPos,
25-
mut pos: BytePos,
2627
mut curr: char,
27-
mut chpos: CharPos,
2828
filemap: @codemap::FileMap,
2929
interner: @token::ident_interner,
3030
/* cached: */
@@ -48,9 +48,10 @@ fn new_low_level_string_reader(span_diagnostic: span_handler,
4848
// Force the initial reader bump to start on a fresh line
4949
let initial_char = '\n';
5050
let r = @{span_diagnostic: span_diagnostic, src: filemap.src,
51-
mut col: CharPos(0), mut pos: BytePos(0),
51+
mut pos: filemap.start_pos,
52+
mut last_pos: filemap.start_pos,
53+
mut col: CharPos(0),
5254
mut curr: initial_char,
53-
mut chpos: filemap.start_pos.ch,
5455
filemap: filemap, interner: itr,
5556
/* dummy values; not read */
5657
mut peek_tok: token::EOF,
@@ -61,7 +62,9 @@ fn new_low_level_string_reader(span_diagnostic: span_handler,
6162

6263
fn dup_string_reader(&&r: string_reader) -> string_reader {
6364
@{span_diagnostic: r.span_diagnostic, src: r.src,
64-
mut col: r.col, mut pos: r.pos, mut curr: r.curr, mut chpos: r.chpos,
65+
mut pos: r.pos,
66+
mut last_pos: r.last_pos,
67+
mut col: r.col, mut curr: r.curr,
6568
filemap: r.filemap, interner: r.interner,
6669
mut peek_tok: r.peek_tok, mut peek_span: r.peek_span}
6770
}
@@ -116,34 +119,48 @@ fn string_advance_token(&&r: string_reader) {
116119
if is_eof(r) {
117120
r.peek_tok = token::EOF;
118121
} else {
119-
let start_chpos = r.chpos;
122+
let start_chpos = r.last_pos.ch;
120123
r.peek_tok = next_token_inner(r);
121-
r.peek_span = ast_util::mk_sp(start_chpos, r.chpos);
124+
r.peek_span = ast_util::mk_sp(start_chpos, r.last_pos.ch);
122125
};
123126

124127
}
125128

129+
fn byte_offset(rdr: string_reader) -> BytePos {
130+
(rdr.pos.byte - rdr.filemap.start_pos.byte)
131+
}
132+
126133
fn get_str_from(rdr: string_reader, start: BytePos) -> ~str unsafe {
127134
// I'm pretty skeptical about this subtraction. What if there's a
128135
// multi-byte character before the mark?
129-
return str::slice(*rdr.src, start.to_uint() - 1u, rdr.pos.to_uint() - 1u);
136+
return str::slice(*rdr.src, start.to_uint() - 1u,
137+
byte_offset(rdr).to_uint() - 1u);
130138
}
131139

132140
fn bump(rdr: string_reader) {
133-
if rdr.pos.to_uint() < (*rdr.src).len() {
141+
rdr.last_pos = rdr.pos;
142+
let current_byte_offset = byte_offset(rdr).to_uint();;
143+
if current_byte_offset < (*rdr.src).len() {
144+
let last_char = rdr.curr;
145+
let next = str::char_range_at(*rdr.src, current_byte_offset);
146+
let byte_offset_diff = next.next - current_byte_offset;
147+
rdr.pos = FilePos {
148+
ch: rdr.pos.ch + CharPos(1u),
149+
byte: rdr.pos.byte + BytePos(byte_offset_diff)
150+
};
151+
rdr.curr = next.ch;
134152
rdr.col += CharPos(1u);
135-
rdr.chpos += CharPos(1u);
136-
if rdr.curr == '\n' {
137-
rdr.filemap.next_line(rdr.chpos, rdr.pos);
153+
if last_char == '\n' {
154+
rdr.filemap.next_line(rdr.last_pos);
138155
rdr.col = CharPos(0u);
139156
}
140-
let next = str::char_range_at(*rdr.src, rdr.pos.to_uint());
141-
rdr.pos = BytePos(next.next);
142-
rdr.curr = next.ch;
143157
} else {
144158
// XXX: What does this accomplish?
145159
if (rdr.curr != -1 as char) {
146-
rdr.chpos += CharPos(1u);
160+
rdr.pos = FilePos {
161+
ch: rdr.pos.ch + CharPos(1u),
162+
byte: rdr.pos.byte + BytePos(1u)
163+
};
147164
rdr.col += CharPos(1u);
148165
rdr.curr = -1 as char;
149166
}
@@ -153,8 +170,9 @@ fn is_eof(rdr: string_reader) -> bool {
153170
rdr.curr == -1 as char
154171
}
155172
fn nextch(rdr: string_reader) -> char {
156-
if rdr.pos.to_uint() < (*rdr.src).len() {
157-
return str::char_at(*rdr.src, rdr.pos.to_uint());
173+
let offset = byte_offset(rdr).to_uint();
174+
if offset < (*rdr.src).len() {
175+
return str::char_at(*rdr.src, offset);
158176
} else { return -1 as char; }
159177
}
160178

@@ -211,15 +229,15 @@ fn consume_any_line_comment(rdr: string_reader)
211229
bump(rdr);
212230
// line comments starting with "///" or "//!" are doc-comments
213231
if rdr.curr == '/' || rdr.curr == '!' {
214-
let start_chpos = rdr.chpos - CharPos(2u);
232+
let start_chpos = rdr.pos.ch - CharPos(2u);
215233
let mut acc = ~"//";
216234
while rdr.curr != '\n' && !is_eof(rdr) {
217235
str::push_char(&mut acc, rdr.curr);
218236
bump(rdr);
219237
}
220238
return Some({
221239
tok: token::DOC_COMMENT(rdr.interner.intern(@acc)),
222-
sp: ast_util::mk_sp(start_chpos, rdr.chpos)
240+
sp: ast_util::mk_sp(start_chpos, rdr.pos.ch)
223241
});
224242
} else {
225243
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
@@ -234,7 +252,7 @@ fn consume_any_line_comment(rdr: string_reader)
234252
if nextch(rdr) == '!' {
235253
let cmap = @CodeMap::new();
236254
(*cmap).files.push(rdr.filemap);
237-
let loc = cmap.lookup_char_pos_adj(rdr.chpos);
255+
let loc = cmap.lookup_char_pos_adj(rdr.last_pos.ch);
238256
if loc.line == 1u && loc.col == CharPos(0u) {
239257
while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
240258
return consume_whitespace_and_comments(rdr);
@@ -250,7 +268,7 @@ fn consume_block_comment(rdr: string_reader)
250268

251269
// block comments starting with "/**" or "/*!" are doc-comments
252270
if rdr.curr == '*' || rdr.curr == '!' {
253-
let start_chpos = rdr.chpos - CharPos(2u);
271+
let start_chpos = rdr.pos.ch - CharPos(2u);
254272
let mut acc = ~"/*";
255273
while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
256274
str::push_char(&mut acc, rdr.curr);
@@ -264,7 +282,7 @@ fn consume_block_comment(rdr: string_reader)
264282
bump(rdr);
265283
return Some({
266284
tok: token::DOC_COMMENT(rdr.interner.intern(@acc)),
267-
sp: ast_util::mk_sp(start_chpos, rdr.chpos)
285+
sp: ast_util::mk_sp(start_chpos, rdr.pos.ch)
268286
});
269287
}
270288
} else {
@@ -584,7 +602,7 @@ fn next_token_inner(rdr: string_reader) -> token::Token {
584602
return token::LIT_INT(c2 as i64, ast::ty_char);
585603
}
586604
'"' => {
587-
let n = rdr.pos;
605+
let n = byte_offset(rdr);
588606
bump(rdr);
589607
while rdr.curr != '"' {
590608
if is_eof(rdr) {

0 commit comments

Comments
 (0)