1
1
use diagnostic:: span_handler;
2
- use codemap:: { span, CodeMap , CharPos , BytePos } ;
2
+ use codemap:: { span, CodeMap , CharPos , BytePos , FilePos } ;
3
3
use ext:: tt:: transcribe:: { tt_reader, new_tt_reader, dup_tt_reader,
4
4
tt_next_token} ;
5
5
@@ -21,10 +21,10 @@ trait reader {
21
21
type string_reader = @{
22
22
span_diagnostic : span_handler ,
23
23
src : @~str ,
24
+ mut pos : FilePos ,
25
+ mut last_pos : FilePos ,
24
26
mut col : CharPos ,
25
- mut pos : BytePos ,
26
27
mut curr : char ,
27
- mut chpos : CharPos ,
28
28
filemap : @codemap:: FileMap ,
29
29
interner : @token:: ident_interner ,
30
30
/* cached: */
@@ -48,9 +48,10 @@ fn new_low_level_string_reader(span_diagnostic: span_handler,
48
48
// Force the initial reader bump to start on a fresh line
49
49
let initial_char = '\n' ;
50
50
let r = @{ span_diagnostic: span_diagnostic, src: filemap. src ,
51
- mut col: CharPos ( 0 ) , mut pos: BytePos ( 0 ) ,
51
+ mut pos: filemap. start_pos ,
52
+ mut last_pos: filemap. start_pos ,
53
+ mut col: CharPos ( 0 ) ,
52
54
mut curr: initial_char,
53
- mut chpos: filemap. start_pos . ch ,
54
55
filemap: filemap, interner: itr,
55
56
/* dummy values; not read */
56
57
mut peek_tok: token:: EOF ,
@@ -61,7 +62,9 @@ fn new_low_level_string_reader(span_diagnostic: span_handler,
61
62
62
63
fn dup_string_reader ( & & r: string_reader ) -> string_reader {
63
64
@{ span_diagnostic: r. span_diagnostic , src: r. src ,
64
- mut col: r. col , mut pos: r. pos , mut curr: r. curr , mut chpos: r. chpos ,
65
+ mut pos: r. pos ,
66
+ mut last_pos: r. last_pos ,
67
+ mut col: r. col , mut curr: r. curr ,
65
68
filemap: r. filemap , interner: r. interner ,
66
69
mut peek_tok: r. peek_tok , mut peek_span: r. peek_span }
67
70
}
@@ -116,34 +119,48 @@ fn string_advance_token(&&r: string_reader) {
116
119
if is_eof ( r) {
117
120
r. peek_tok = token:: EOF ;
118
121
} else {
119
- let start_chpos = r. chpos ;
122
+ let start_chpos = r. last_pos . ch ;
120
123
r. peek_tok = next_token_inner ( r) ;
121
- r. peek_span = ast_util:: mk_sp ( start_chpos, r. chpos ) ;
124
+ r. peek_span = ast_util:: mk_sp ( start_chpos, r. last_pos . ch ) ;
122
125
} ;
123
126
124
127
}
125
128
129
+ fn byte_offset ( rdr : string_reader ) -> BytePos {
130
+ ( rdr. pos . byte - rdr. filemap . start_pos . byte )
131
+ }
132
+
126
133
fn get_str_from ( rdr : string_reader , start : BytePos ) -> ~str unsafe {
127
134
// I'm pretty skeptical about this subtraction. What if there's a
128
135
// multi-byte character before the mark?
129
- return str:: slice ( * rdr. src , start. to_uint ( ) - 1 u, rdr. pos . to_uint ( ) - 1 u) ;
136
+ return str:: slice ( * rdr. src , start. to_uint ( ) - 1 u,
137
+ byte_offset ( rdr) . to_uint ( ) - 1 u) ;
130
138
}
131
139
132
140
fn bump ( rdr : string_reader ) {
133
- if rdr. pos . to_uint ( ) < ( * rdr. src ) . len ( ) {
141
+ rdr. last_pos = rdr. pos ;
142
+ let current_byte_offset = byte_offset ( rdr) . to_uint ( ) ; ;
143
+ if current_byte_offset < ( * rdr. src ) . len ( ) {
144
+ let last_char = rdr. curr ;
145
+ let next = str:: char_range_at ( * rdr. src , current_byte_offset) ;
146
+ let byte_offset_diff = next. next - current_byte_offset;
147
+ rdr. pos = FilePos {
148
+ ch : rdr. pos . ch + CharPos ( 1 u) ,
149
+ byte : rdr. pos . byte + BytePos ( byte_offset_diff)
150
+ } ;
151
+ rdr. curr = next. ch ;
134
152
rdr. col += CharPos ( 1 u) ;
135
- rdr. chpos += CharPos ( 1 u) ;
136
- if rdr. curr == '\n' {
137
- rdr. filemap . next_line ( rdr. chpos , rdr. pos ) ;
153
+ if last_char == '\n' {
154
+ rdr. filemap . next_line ( rdr. last_pos ) ;
138
155
rdr. col = CharPos ( 0 u) ;
139
156
}
140
- let next = str:: char_range_at ( * rdr. src , rdr. pos . to_uint ( ) ) ;
141
- rdr. pos = BytePos ( next. next ) ;
142
- rdr. curr = next. ch ;
143
157
} else {
144
158
// XXX: What does this accomplish?
145
159
if ( rdr. curr != -1 as char ) {
146
- rdr. chpos += CharPos ( 1 u) ;
160
+ rdr. pos = FilePos {
161
+ ch : rdr. pos . ch + CharPos ( 1 u) ,
162
+ byte : rdr. pos . byte + BytePos ( 1 u)
163
+ } ;
147
164
rdr. col += CharPos ( 1 u) ;
148
165
rdr. curr = -1 as char ;
149
166
}
@@ -153,8 +170,9 @@ fn is_eof(rdr: string_reader) -> bool {
153
170
rdr. curr == -1 as char
154
171
}
155
172
fn nextch ( rdr : string_reader ) -> char {
156
- if rdr. pos . to_uint ( ) < ( * rdr. src ) . len ( ) {
157
- return str:: char_at ( * rdr. src , rdr. pos . to_uint ( ) ) ;
173
+ let offset = byte_offset ( rdr) . to_uint ( ) ;
174
+ if offset < ( * rdr. src ) . len ( ) {
175
+ return str:: char_at ( * rdr. src , offset) ;
158
176
} else { return -1 as char ; }
159
177
}
160
178
@@ -211,15 +229,15 @@ fn consume_any_line_comment(rdr: string_reader)
211
229
bump ( rdr) ;
212
230
// line comments starting with "///" or "//!" are doc-comments
213
231
if rdr. curr == '/' || rdr. curr == '!' {
214
- let start_chpos = rdr. chpos - CharPos ( 2 u) ;
232
+ let start_chpos = rdr. pos . ch - CharPos ( 2 u) ;
215
233
let mut acc = ~"//";
216
234
while rdr. curr != '\n' && !is_eof ( rdr) {
217
235
str:: push_char ( & mut acc, rdr. curr ) ;
218
236
bump ( rdr) ;
219
237
}
220
238
return Some ( {
221
239
tok: token:: DOC_COMMENT ( rdr. interner . intern ( @acc) ) ,
222
- sp: ast_util:: mk_sp ( start_chpos, rdr. chpos )
240
+ sp: ast_util:: mk_sp ( start_chpos, rdr. pos . ch )
223
241
} ) ;
224
242
} else {
225
243
while rdr. curr != '\n' && !is_eof ( rdr) { bump ( rdr) ; }
@@ -234,7 +252,7 @@ fn consume_any_line_comment(rdr: string_reader)
234
252
if nextch ( rdr) == '!' {
235
253
let cmap = @CodeMap :: new ( ) ;
236
254
( * cmap) . files . push ( rdr. filemap ) ;
237
- let loc = cmap. lookup_char_pos_adj ( rdr. chpos ) ;
255
+ let loc = cmap. lookup_char_pos_adj ( rdr. last_pos . ch ) ;
238
256
if loc. line == 1 u && loc. col == CharPos ( 0 u) {
239
257
while rdr. curr != '\n' && !is_eof ( rdr) { bump ( rdr) ; }
240
258
return consume_whitespace_and_comments ( rdr) ;
@@ -250,7 +268,7 @@ fn consume_block_comment(rdr: string_reader)
250
268
251
269
// block comments starting with "/**" or "/*!" are doc-comments
252
270
if rdr. curr == '*' || rdr. curr == '!' {
253
- let start_chpos = rdr. chpos - CharPos ( 2 u) ;
271
+ let start_chpos = rdr. pos . ch - CharPos ( 2 u) ;
254
272
let mut acc = ~"/* ";
255
273
while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
256
274
str::push_char(&mut acc, rdr.curr);
@@ -264,7 +282,7 @@ fn consume_block_comment(rdr: string_reader)
264
282
bump ( rdr) ;
265
283
return Some ( {
266
284
tok: token:: DOC_COMMENT ( rdr. interner . intern ( @acc) ) ,
267
- sp: ast_util:: mk_sp ( start_chpos, rdr. chpos )
285
+ sp: ast_util:: mk_sp ( start_chpos, rdr. pos . ch )
268
286
} ) ;
269
287
}
270
288
} else {
@@ -584,7 +602,7 @@ fn next_token_inner(rdr: string_reader) -> token::Token {
584
602
return token:: LIT_INT ( c2 as i64 , ast:: ty_char) ;
585
603
}
586
604
'"' => {
587
- let n = rdr. pos ;
605
+ let n = byte_offset ( rdr) ;
588
606
bump ( rdr) ;
589
607
while rdr. curr != '"' {
590
608
if is_eof ( rdr) {
0 commit comments