@@ -38,18 +38,17 @@ use std::convert::TryFrom;
38
38
#[ derive( Debug ) ]
39
39
pub struct Token {
40
40
pub kind : TokenKind ,
41
- pub len : usize ,
41
+ pub len : u32 ,
42
42
}
43
43
44
44
impl Token {
45
- fn new ( kind : TokenKind , len : usize ) -> Token {
45
+ fn new ( kind : TokenKind , len : u32 ) -> Token {
46
46
Token { kind, len }
47
47
}
48
48
}
49
49
50
50
/// Enum representing common lexeme types.
51
- // perf note: Changing all `usize` to `u32` doesn't change performance. See #77629
52
- #[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
51
+ #[ derive( Clone , Copy , Debug , PartialEq , Eq ) ]
53
52
pub enum TokenKind {
54
53
// Multi-char tokens:
55
54
/// "// comment"
@@ -76,7 +75,7 @@ pub enum TokenKind {
76
75
/// tokens.
77
76
UnknownPrefix ,
78
77
/// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
79
- Literal { kind : LiteralKind , suffix_start : usize } ,
78
+ Literal { kind : LiteralKind , suffix_start : u32 } ,
80
79
/// "'a"
81
80
Lifetime { starts_with_number : bool } ,
82
81
@@ -160,26 +159,24 @@ pub enum LiteralKind {
160
159
Str { terminated : bool } ,
161
160
/// "b"abc"", "b"abc"
162
161
ByteStr { terminated : bool } ,
163
- /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
164
- RawStr { n_hashes : u8 , err : Option < RawStrError > } ,
165
- /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
166
- RawByteStr { n_hashes : u8 , err : Option < RawStrError > } ,
162
+ /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
163
+ /// an invalid literal.
164
+ RawStr { n_hashes : Option < u8 > } ,
165
+ /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
166
+ /// indicates an invalid literal.
167
+ RawByteStr { n_hashes : Option < u8 > } ,
167
168
}
168
169
169
- /// Error produced validating a raw string. Represents cases like:
170
- /// - `r##~"abcde"##`: `InvalidStarter`
171
- /// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
172
- /// - Too many `#`s (>255): `TooManyDelimiters`
173
- // perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629
174
170
#[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
175
171
pub enum RawStrError {
176
- /// Non `#` characters exist between `r` and `"` eg. `r#~".. `
172
+ /// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"## `
177
173
InvalidStarter { bad_char : char } ,
178
- /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
179
- /// may have intended to terminate it.
180
- NoTerminator { expected : usize , found : usize , possible_terminator_offset : Option < usize > } ,
174
+ /// The string was not terminated, e.g. `r###"abcde"##`.
175
+ /// `possible_terminator_offset` is the number of characters after `r` or
176
+ /// `br` where they may have intended to terminate it.
177
+ NoTerminator { expected : u32 , found : u32 , possible_terminator_offset : Option < u32 > } ,
181
178
/// More than 255 `#`s exist.
182
- TooManyDelimiters { found : usize } ,
179
+ TooManyDelimiters { found : u32 } ,
183
180
}
184
181
185
182
/// Base of numeric literal encoding according to its prefix.
@@ -221,11 +218,25 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
221
218
}
222
219
223
220
/// Parses the first token from the provided input string.
221
+ #[ inline]
224
222
pub fn first_token ( input : & str ) -> Token {
225
223
debug_assert ! ( !input. is_empty( ) ) ;
226
224
Cursor :: new ( input) . advance_token ( )
227
225
}
228
226
227
+ /// Validates a raw string literal. Used for getting more information about a
228
+ /// problem with a `RawStr`/`RawByteStr` with a `None` field.
229
+ #[ inline]
230
+ pub fn validate_raw_str ( input : & str , prefix_len : u32 ) -> Result < ( ) , RawStrError > {
231
+ debug_assert ! ( !input. is_empty( ) ) ;
232
+ let mut cursor = Cursor :: new ( input) ;
233
+ // Move past the leading `r` or `br`.
234
+ for _ in 0 ..prefix_len {
235
+ cursor. bump ( ) . unwrap ( ) ;
236
+ }
237
+ cursor. raw_double_quoted_string ( prefix_len) . map ( |_| ( ) )
238
+ }
239
+
229
240
/// Creates an iterator that produces tokens from the input string.
230
241
pub fn tokenize ( input : & str ) -> impl Iterator < Item = Token > + ' _ {
231
242
let mut cursor = Cursor :: new ( input) ;
@@ -315,12 +326,12 @@ impl Cursor<'_> {
315
326
'r' => match ( self . first ( ) , self . second ( ) ) {
316
327
( '#' , c1) if is_id_start ( c1) => self . raw_ident ( ) ,
317
328
( '#' , _) | ( '"' , _) => {
318
- let ( n_hashes , err ) = self . raw_double_quoted_string ( 1 ) ;
329
+ let res = self . raw_double_quoted_string ( 1 ) ;
319
330
let suffix_start = self . len_consumed ( ) ;
320
- if err . is_none ( ) {
331
+ if res . is_ok ( ) {
321
332
self . eat_literal_suffix ( ) ;
322
333
}
323
- let kind = RawStr { n_hashes, err } ;
334
+ let kind = RawStr { n_hashes : res . ok ( ) } ;
324
335
Literal { kind, suffix_start }
325
336
}
326
337
_ => self . ident_or_unknown_prefix ( ) ,
@@ -350,12 +361,12 @@ impl Cursor<'_> {
350
361
}
351
362
( 'r' , '"' ) | ( 'r' , '#' ) => {
352
363
self . bump ( ) ;
353
- let ( n_hashes , err ) = self . raw_double_quoted_string ( 2 ) ;
364
+ let res = self . raw_double_quoted_string ( 2 ) ;
354
365
let suffix_start = self . len_consumed ( ) ;
355
- if err . is_none ( ) {
366
+ if res . is_ok ( ) {
356
367
self . eat_literal_suffix ( ) ;
357
368
}
358
- let kind = RawByteStr { n_hashes, err } ;
369
+ let kind = RawByteStr { n_hashes : res . ok ( ) } ;
359
370
Literal { kind, suffix_start }
360
371
}
361
372
_ => self . ident_or_unknown_prefix ( ) ,
@@ -698,19 +709,18 @@ impl Cursor<'_> {
698
709
}
699
710
700
711
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
701
- fn raw_double_quoted_string ( & mut self , prefix_len : usize ) -> ( u8 , Option < RawStrError > ) {
712
+ fn raw_double_quoted_string ( & mut self , prefix_len : u32 ) -> Result < u8 , RawStrError > {
702
713
// Wrap the actual function to handle the error with too many hashes.
703
714
// This way, it eats the whole raw string.
704
- let ( n_hashes, err ) = self . raw_string_unvalidated ( prefix_len) ;
715
+ let n_hashes = self . raw_string_unvalidated ( prefix_len) ? ;
705
716
// Only up to 255 `#`s are allowed in raw strings
706
717
match u8:: try_from ( n_hashes) {
707
- Ok ( num) => ( num, err) ,
708
- // We lie about the number of hashes here :P
709
- Err ( _) => ( 0 , Some ( RawStrError :: TooManyDelimiters { found : n_hashes } ) ) ,
718
+ Ok ( num) => Ok ( num) ,
719
+ Err ( _) => Err ( RawStrError :: TooManyDelimiters { found : n_hashes } ) ,
710
720
}
711
721
}
712
722
713
- fn raw_string_unvalidated ( & mut self , prefix_len : usize ) -> ( usize , Option < RawStrError > ) {
723
+ fn raw_string_unvalidated ( & mut self , prefix_len : u32 ) -> Result < u32 , RawStrError > {
714
724
debug_assert ! ( self . prev( ) == 'r' ) ;
715
725
let start_pos = self . len_consumed ( ) ;
716
726
let mut possible_terminator_offset = None ;
@@ -729,7 +739,7 @@ impl Cursor<'_> {
729
739
Some ( '"' ) => ( ) ,
730
740
c => {
731
741
let c = c. unwrap_or ( EOF_CHAR ) ;
732
- return ( n_start_hashes , Some ( RawStrError :: InvalidStarter { bad_char : c } ) ) ;
742
+ return Err ( RawStrError :: InvalidStarter { bad_char : c } ) ;
733
743
}
734
744
}
735
745
@@ -739,14 +749,11 @@ impl Cursor<'_> {
739
749
self . eat_while ( |c| c != '"' ) ;
740
750
741
751
if self . is_eof ( ) {
742
- return (
743
- n_start_hashes,
744
- Some ( RawStrError :: NoTerminator {
745
- expected : n_start_hashes,
746
- found : max_hashes,
747
- possible_terminator_offset,
748
- } ) ,
749
- ) ;
752
+ return Err ( RawStrError :: NoTerminator {
753
+ expected : n_start_hashes,
754
+ found : max_hashes,
755
+ possible_terminator_offset,
756
+ } ) ;
750
757
}
751
758
752
759
// Eat closing double quote.
@@ -764,7 +771,7 @@ impl Cursor<'_> {
764
771
}
765
772
766
773
if n_end_hashes == n_start_hashes {
767
- return ( n_start_hashes, None ) ;
774
+ return Ok ( n_start_hashes) ;
768
775
} else if n_end_hashes > max_hashes {
769
776
// Keep track of possible terminators to give a hint about
770
777
// where there might be a missing terminator
0 commit comments