@@ -38,18 +38,17 @@ use std::convert::TryFrom;
38
38
#[ derive( Debug ) ]
39
39
pub struct Token {
40
40
pub kind : TokenKind ,
41
- pub len : usize ,
41
+ pub len : u32 ,
42
42
}
43
43
44
44
impl Token {
45
- fn new ( kind : TokenKind , len : usize ) -> Token {
45
+ fn new ( kind : TokenKind , len : u32 ) -> Token {
46
46
Token { kind, len }
47
47
}
48
48
}
49
49
50
50
/// Enum representing common lexeme types.
51
- // perf note: Changing all `usize` to `u32` doesn't change performance. See #77629
52
- #[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
51
+ #[ derive( Clone , Copy , Debug , PartialEq , Eq ) ]
53
52
pub enum TokenKind {
54
53
// Multi-char tokens:
55
54
/// "// comment"
@@ -76,7 +75,7 @@ pub enum TokenKind {
76
75
/// tokens.
77
76
UnknownPrefix ,
78
77
/// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
79
- Literal { kind : LiteralKind , suffix_start : usize } ,
78
+ Literal { kind : LiteralKind , suffix_start : u32 } ,
80
79
/// "'a"
81
80
Lifetime { starts_with_number : bool } ,
82
81
@@ -160,26 +159,24 @@ pub enum LiteralKind {
160
159
Str { terminated : bool } ,
161
160
/// "b"abc"", "b"abc"
162
161
ByteStr { terminated : bool } ,
163
- /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
164
- RawStr { n_hashes : u8 , err : Option < RawStrError > } ,
165
- /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
166
- RawByteStr { n_hashes : u8 , err : Option < RawStrError > } ,
162
+ /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
163
+ /// an invalid literal.
164
+ RawStr { n_hashes : Option < u8 > } ,
165
+ /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
166
+ /// indicates an invalid literal.
167
+ RawByteStr { n_hashes : Option < u8 > } ,
167
168
}
168
169
169
- /// Error produced validating a raw string. Represents cases like:
170
- /// - `r##~"abcde"##`: `InvalidStarter`
171
- /// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
172
- /// - Too many `#`s (>255): `TooManyDelimiters`
173
- // perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629
174
170
#[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
175
171
pub enum RawStrError {
176
- /// Non `#` characters exist between `r` and `"` eg. `r#~".. `
172
+ /// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"## `
177
173
InvalidStarter { bad_char : char } ,
178
- /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
179
- /// may have intended to terminate it.
180
- NoTerminator { expected : usize , found : usize , possible_terminator_offset : Option < usize > } ,
174
+ /// The string was not terminated, e.g. `r###"abcde"##`.
175
+ /// `possible_terminator_offset` is the number of characters after `r` or
176
+ /// `br` where they may have intended to terminate it.
177
+ NoTerminator { expected : u32 , found : u32 , possible_terminator_offset : Option < u32 > } ,
181
178
/// More than 255 `#`s exist.
182
- TooManyDelimiters { found : usize } ,
179
+ TooManyDelimiters { found : u32 } ,
183
180
}
184
181
185
182
/// Base of numeric literal encoding according to its prefix.
@@ -227,6 +224,19 @@ pub fn first_token(input: &str) -> Token {
227
224
Cursor :: new ( input) . advance_token ( )
228
225
}
229
226
227
+ /// Validates a raw string literal. Used for getting more information about a
228
+ /// problem with a `RawStr`/`RawByteStr` with a `None` field.
229
+ #[ inline]
230
+ pub fn validate_raw_str ( input : & str , prefix_len : u32 ) -> Result < ( ) , RawStrError > {
231
+ debug_assert ! ( !input. is_empty( ) ) ;
232
+ let mut cursor = Cursor :: new ( input) ;
233
+ // Move past the leading `r` or `br`.
234
+ for _ in 0 ..prefix_len {
235
+ cursor. bump ( ) . unwrap ( ) ;
236
+ }
237
+ cursor. raw_double_quoted_string ( prefix_len) . map ( |_| ( ) )
238
+ }
239
+
230
240
/// Creates an iterator that produces tokens from the input string.
231
241
pub fn tokenize ( input : & str ) -> impl Iterator < Item = Token > + ' _ {
232
242
let mut cursor = Cursor :: new ( input) ;
@@ -316,12 +326,12 @@ impl Cursor<'_> {
316
326
'r' => match ( self . first ( ) , self . second ( ) ) {
317
327
( '#' , c1) if is_id_start ( c1) => self . raw_ident ( ) ,
318
328
( '#' , _) | ( '"' , _) => {
319
- let ( n_hashes , err ) = self . raw_double_quoted_string ( 1 ) ;
329
+ let res = self . raw_double_quoted_string ( 1 ) ;
320
330
let suffix_start = self . len_consumed ( ) ;
321
- if err . is_none ( ) {
331
+ if res . is_ok ( ) {
322
332
self . eat_literal_suffix ( ) ;
323
333
}
324
- let kind = RawStr { n_hashes, err } ;
334
+ let kind = RawStr { n_hashes : res . ok ( ) } ;
325
335
Literal { kind, suffix_start }
326
336
}
327
337
_ => self . ident_or_unknown_prefix ( ) ,
@@ -351,12 +361,12 @@ impl Cursor<'_> {
351
361
}
352
362
( 'r' , '"' ) | ( 'r' , '#' ) => {
353
363
self . bump ( ) ;
354
- let ( n_hashes , err ) = self . raw_double_quoted_string ( 2 ) ;
364
+ let res = self . raw_double_quoted_string ( 2 ) ;
355
365
let suffix_start = self . len_consumed ( ) ;
356
- if err . is_none ( ) {
366
+ if res . is_ok ( ) {
357
367
self . eat_literal_suffix ( ) ;
358
368
}
359
- let kind = RawByteStr { n_hashes, err } ;
369
+ let kind = RawByteStr { n_hashes : res . ok ( ) } ;
360
370
Literal { kind, suffix_start }
361
371
}
362
372
_ => self . ident_or_unknown_prefix ( ) ,
@@ -699,19 +709,18 @@ impl Cursor<'_> {
699
709
}
700
710
701
711
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
702
- fn raw_double_quoted_string ( & mut self , prefix_len : usize ) -> ( u8 , Option < RawStrError > ) {
712
+ fn raw_double_quoted_string ( & mut self , prefix_len : u32 ) -> Result < u8 , RawStrError > {
703
713
// Wrap the actual function to handle the error with too many hashes.
704
714
// This way, it eats the whole raw string.
705
- let ( n_hashes, err ) = self . raw_string_unvalidated ( prefix_len) ;
715
+ let n_hashes = self . raw_string_unvalidated ( prefix_len) ? ;
706
716
// Only up to 255 `#`s are allowed in raw strings
707
717
match u8:: try_from ( n_hashes) {
708
- Ok ( num) => ( num, err) ,
709
- // We lie about the number of hashes here :P
710
- Err ( _) => ( 0 , Some ( RawStrError :: TooManyDelimiters { found : n_hashes } ) ) ,
718
+ Ok ( num) => Ok ( num) ,
719
+ Err ( _) => Err ( RawStrError :: TooManyDelimiters { found : n_hashes } ) ,
711
720
}
712
721
}
713
722
714
- fn raw_string_unvalidated ( & mut self , prefix_len : usize ) -> ( usize , Option < RawStrError > ) {
723
+ fn raw_string_unvalidated ( & mut self , prefix_len : u32 ) -> Result < u32 , RawStrError > {
715
724
debug_assert ! ( self . prev( ) == 'r' ) ;
716
725
let start_pos = self . len_consumed ( ) ;
717
726
let mut possible_terminator_offset = None ;
@@ -730,7 +739,7 @@ impl Cursor<'_> {
730
739
Some ( '"' ) => ( ) ,
731
740
c => {
732
741
let c = c. unwrap_or ( EOF_CHAR ) ;
733
- return ( n_start_hashes , Some ( RawStrError :: InvalidStarter { bad_char : c } ) ) ;
742
+ return Err ( RawStrError :: InvalidStarter { bad_char : c } ) ;
734
743
}
735
744
}
736
745
@@ -740,14 +749,11 @@ impl Cursor<'_> {
740
749
self . eat_while ( |c| c != '"' ) ;
741
750
742
751
if self . is_eof ( ) {
743
- return (
744
- n_start_hashes,
745
- Some ( RawStrError :: NoTerminator {
746
- expected : n_start_hashes,
747
- found : max_hashes,
748
- possible_terminator_offset,
749
- } ) ,
750
- ) ;
752
+ return Err ( RawStrError :: NoTerminator {
753
+ expected : n_start_hashes,
754
+ found : max_hashes,
755
+ possible_terminator_offset,
756
+ } ) ;
751
757
}
752
758
753
759
// Eat closing double quote.
@@ -765,7 +771,7 @@ impl Cursor<'_> {
765
771
}
766
772
767
773
if n_end_hashes == n_start_hashes {
768
- return ( n_start_hashes, None ) ;
774
+ return Ok ( n_start_hashes) ;
769
775
} else if n_end_hashes > max_hashes {
770
776
// Keep track of possible terminators to give a hint about
771
777
// where there might be a missing terminator
0 commit comments