@@ -29,9 +29,11 @@ pub mod unescape;
29
29
#[ cfg( test) ]
30
30
mod tests;
31
31
32
+ pub use crate :: cursor:: Cursor ;
33
+
32
34
use self :: LiteralKind :: * ;
33
35
use self :: TokenKind :: * ;
34
- use crate :: cursor:: { Cursor , EOF_CHAR } ;
36
+ use crate :: cursor:: EOF_CHAR ;
35
37
use std:: convert:: TryFrom ;
36
38
37
39
/// Parsed token.
@@ -139,6 +141,9 @@ pub enum TokenKind {
139
141
140
142
/// Unknown token, not expected by the lexer, e.g. "№"
141
143
Unknown ,
144
+
145
+ /// End of input.
146
+ Eof ,
142
147
}
143
148
144
149
#[ derive( Clone , Copy , Debug , PartialEq , Eq ) ]
@@ -219,13 +224,6 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
219
224
None
220
225
}
221
226
222
- /// Parses the first token from the provided input string.
223
- #[ inline]
224
- pub fn first_token ( input : & str ) -> Token {
225
- debug_assert ! ( !input. is_empty( ) ) ;
226
- Cursor :: new ( input) . advance_token ( )
227
- }
228
-
229
227
/// Validates a raw string literal. Used for getting more information about a
230
228
/// problem with a `RawStr`/`RawByteStr` with a `None` field.
231
229
#[ inline]
@@ -243,12 +241,8 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
243
241
pub fn tokenize ( input : & str ) -> impl Iterator < Item = Token > + ' _ {
244
242
let mut cursor = Cursor :: new ( input) ;
245
243
std:: iter:: from_fn ( move || {
246
- if cursor. is_eof ( ) {
247
- None
248
- } else {
249
- cursor. reset_len_consumed ( ) ;
250
- Some ( cursor. advance_token ( ) )
251
- }
244
+ let token = cursor. advance_token ( ) ;
245
+ if token. kind != TokenKind :: Eof { Some ( token) } else { None }
252
246
} )
253
247
}
254
248
@@ -311,8 +305,11 @@ pub fn is_ident(string: &str) -> bool {
311
305
312
306
impl Cursor < ' _ > {
313
307
/// Parses a token from the input string.
314
- fn advance_token ( & mut self ) -> Token {
315
- let first_char = self . bump ( ) . unwrap ( ) ;
308
+ pub fn advance_token ( & mut self ) -> Token {
309
+ let first_char = match self . bump ( ) {
310
+ Some ( c) => c,
311
+ None => return Token :: new ( TokenKind :: Eof , 0 ) ,
312
+ } ;
316
313
let token_kind = match first_char {
317
314
// Slash, comment or block comment.
318
315
'/' => match self . first ( ) {
@@ -329,7 +326,7 @@ impl Cursor<'_> {
329
326
( '#' , c1) if is_id_start ( c1) => self . raw_ident ( ) ,
330
327
( '#' , _) | ( '"' , _) => {
331
328
let res = self . raw_double_quoted_string ( 1 ) ;
332
- let suffix_start = self . len_consumed ( ) ;
329
+ let suffix_start = self . pos_within_token ( ) ;
333
330
if res. is_ok ( ) {
334
331
self . eat_literal_suffix ( ) ;
335
332
}
@@ -344,7 +341,7 @@ impl Cursor<'_> {
344
341
( '\'' , _) => {
345
342
self . bump ( ) ;
346
343
let terminated = self . single_quoted_string ( ) ;
347
- let suffix_start = self . len_consumed ( ) ;
344
+ let suffix_start = self . pos_within_token ( ) ;
348
345
if terminated {
349
346
self . eat_literal_suffix ( ) ;
350
347
}
@@ -354,7 +351,7 @@ impl Cursor<'_> {
354
351
( '"' , _) => {
355
352
self . bump ( ) ;
356
353
let terminated = self . double_quoted_string ( ) ;
357
- let suffix_start = self . len_consumed ( ) ;
354
+ let suffix_start = self . pos_within_token ( ) ;
358
355
if terminated {
359
356
self . eat_literal_suffix ( ) ;
360
357
}
@@ -364,7 +361,7 @@ impl Cursor<'_> {
364
361
( 'r' , '"' ) | ( 'r' , '#' ) => {
365
362
self . bump ( ) ;
366
363
let res = self . raw_double_quoted_string ( 2 ) ;
367
- let suffix_start = self . len_consumed ( ) ;
364
+ let suffix_start = self . pos_within_token ( ) ;
368
365
if res. is_ok ( ) {
369
366
self . eat_literal_suffix ( ) ;
370
367
}
@@ -381,7 +378,7 @@ impl Cursor<'_> {
381
378
// Numeric literal.
382
379
c @ '0' ..='9' => {
383
380
let literal_kind = self . number ( c) ;
384
- let suffix_start = self . len_consumed ( ) ;
381
+ let suffix_start = self . pos_within_token ( ) ;
385
382
self . eat_literal_suffix ( ) ;
386
383
TokenKind :: Literal { kind : literal_kind, suffix_start }
387
384
}
@@ -420,7 +417,7 @@ impl Cursor<'_> {
420
417
// String literal.
421
418
'"' => {
422
419
let terminated = self . double_quoted_string ( ) ;
423
- let suffix_start = self . len_consumed ( ) ;
420
+ let suffix_start = self . pos_within_token ( ) ;
424
421
if terminated {
425
422
self . eat_literal_suffix ( ) ;
426
423
}
@@ -433,7 +430,9 @@ impl Cursor<'_> {
433
430
}
434
431
_ => Unknown ,
435
432
} ;
436
- Token :: new ( token_kind, self . len_consumed ( ) )
433
+ let res = Token :: new ( token_kind, self . pos_within_token ( ) ) ;
434
+ self . reset_pos_within_token ( ) ;
435
+ res
437
436
}
438
437
439
438
fn line_comment ( & mut self ) -> TokenKind {
@@ -618,7 +617,7 @@ impl Cursor<'_> {
618
617
619
618
if !can_be_a_lifetime {
620
619
let terminated = self . single_quoted_string ( ) ;
621
- let suffix_start = self . len_consumed ( ) ;
620
+ let suffix_start = self . pos_within_token ( ) ;
622
621
if terminated {
623
622
self . eat_literal_suffix ( ) ;
624
623
}
@@ -643,7 +642,7 @@ impl Cursor<'_> {
643
642
if self . first ( ) == '\'' {
644
643
self . bump ( ) ;
645
644
let kind = Char { terminated : true } ;
646
- Literal { kind, suffix_start : self . len_consumed ( ) }
645
+ Literal { kind, suffix_start : self . pos_within_token ( ) }
647
646
} else {
648
647
Lifetime { starts_with_number }
649
648
}
@@ -724,7 +723,7 @@ impl Cursor<'_> {
724
723
725
724
fn raw_string_unvalidated ( & mut self , prefix_len : u32 ) -> Result < u32 , RawStrError > {
726
725
debug_assert ! ( self . prev( ) == 'r' ) ;
727
- let start_pos = self . len_consumed ( ) ;
726
+ let start_pos = self . pos_within_token ( ) ;
728
727
let mut possible_terminator_offset = None ;
729
728
let mut max_hashes = 0 ;
730
729
@@ -778,7 +777,7 @@ impl Cursor<'_> {
778
777
// Keep track of possible terminators to give a hint about
779
778
// where there might be a missing terminator
780
779
possible_terminator_offset =
781
- Some ( self . len_consumed ( ) - start_pos - n_end_hashes + prefix_len) ;
780
+ Some ( self . pos_within_token ( ) - start_pos - n_end_hashes + prefix_len) ;
782
781
max_hashes = n_end_hashes;
783
782
}
784
783
}
0 commit comments