28
28
//!
29
29
//! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
30
30
31
- use std:: borrow:: Cow ;
32
31
use std:: iter:: FusedIterator ;
33
32
use std:: { char, cmp:: Ordering , str:: FromStr } ;
34
33
@@ -263,9 +262,10 @@ impl<'source> Lexer<'source> {
263
262
'x' | 'o' | 'b'
264
263
) ) ;
265
264
266
- let value_text = self . radix_run ( None , radix) ;
265
+ let mut number = LexedText :: new ( self . offset ( ) , self . source ) ;
266
+ self . radix_run ( & mut number, radix) ;
267
267
let value =
268
- BigInt :: from_str_radix ( & value_text , radix. as_u32 ( ) ) . map_err ( |e| LexicalError {
268
+ BigInt :: from_str_radix ( number . as_str ( ) , radix. as_u32 ( ) ) . map_err ( |e| LexicalError {
269
269
error : LexicalErrorType :: OtherError ( format ! ( "{e:?}" ) ) ,
270
270
location : self . token_range ( ) . start ( ) ,
271
271
} ) ?;
@@ -278,15 +278,14 @@ impl<'source> Lexer<'source> {
278
278
debug_assert ! ( self . cursor. previous( ) . is_ascii_digit( ) || self . cursor. previous( ) == '.' ) ;
279
279
let start_is_zero = first_digit_or_dot == '0' ;
280
280
281
- let mut value_text = if first_digit_or_dot == '.' {
282
- String :: new ( )
283
- } else {
284
- self . radix_run ( Some ( first_digit_or_dot) , Radix :: Decimal )
285
- . into_owned ( )
281
+ let mut number = LexedText :: new ( self . token_start ( ) , self . source ) ;
282
+ if first_digit_or_dot != '.' {
283
+ number. push ( first_digit_or_dot) ;
284
+ self . radix_run ( & mut number, Radix :: Decimal ) ;
286
285
} ;
287
286
288
287
let is_float = if first_digit_or_dot == '.' || self . cursor . eat_char ( '.' ) {
289
- value_text . push ( '.' ) ;
288
+ number . push ( '.' ) ;
290
289
291
290
if self . cursor . eat_char ( '_' ) {
292
291
return Err ( LexicalError {
@@ -295,7 +294,7 @@ impl<'source> Lexer<'source> {
295
294
} ) ;
296
295
}
297
296
298
- value_text . push_str ( & self . radix_run ( None , Radix :: Decimal ) ) ;
297
+ self . radix_run ( & mut number , Radix :: Decimal ) ;
299
298
true
300
299
} else {
301
300
// Normal number:
@@ -304,14 +303,14 @@ impl<'source> Lexer<'source> {
304
303
305
304
let is_float = match self . cursor . rest ( ) . as_bytes ( ) {
306
305
[ b'e' | b'E' , b'0' ..=b'9' , ..] | [ b'e' | b'E' , b'-' | b'+' , b'0' ..=b'9' , ..] => {
307
- value_text . push ( 'e' ) ;
308
- self . cursor . bump ( ) ; // e | E
306
+ // 'e' | 'E'
307
+ number . push ( self . cursor . bump ( ) . unwrap ( ) ) ;
309
308
310
309
if let Some ( sign) = self . cursor . eat_if ( |c| matches ! ( c, '+' | '-' ) ) {
311
- value_text . push ( sign) ;
310
+ number . push ( sign) ;
312
311
}
313
312
314
- value_text . push_str ( & self . radix_run ( None , Radix :: Decimal ) ) ;
313
+ self . radix_run ( & mut number , Radix :: Decimal ) ;
315
314
316
315
true
317
316
}
@@ -320,7 +319,7 @@ impl<'source> Lexer<'source> {
320
319
321
320
if is_float {
322
321
// Improvement: Use `Cow` instead of pushing to value text
323
- let value = f64:: from_str ( & value_text ) . map_err ( |_| LexicalError {
322
+ let value = f64:: from_str ( number . as_str ( ) ) . map_err ( |_| LexicalError {
324
323
error : LexicalErrorType :: OtherError ( "Invalid decimal literal" . to_owned ( ) ) ,
325
324
location : self . token_start ( ) ,
326
325
} ) ?;
@@ -337,10 +336,10 @@ impl<'source> Lexer<'source> {
337
336
} else {
338
337
// Parse trailing 'j':
339
338
if self . cursor . eat_if ( |c| matches ! ( c, 'j' | 'J' ) ) . is_some ( ) {
340
- let imag = f64:: from_str ( & value_text ) . unwrap ( ) ;
339
+ let imag = f64:: from_str ( number . as_str ( ) ) . unwrap ( ) ;
341
340
Ok ( Tok :: Complex { real : 0.0 , imag } )
342
341
} else {
343
- let value = value_text . parse :: < BigInt > ( ) . unwrap ( ) ;
342
+ let value = number . as_str ( ) . parse :: < BigInt > ( ) . unwrap ( ) ;
344
343
if start_is_zero && !value. is_zero ( ) {
345
344
// leading zeros in decimal integer literals are not permitted
346
345
return Err ( LexicalError {
@@ -356,34 +355,19 @@ impl<'source> Lexer<'source> {
356
355
/// Consume a sequence of numbers with the given radix,
357
356
/// the digits can be decorated with underscores
358
357
/// like this: '`1_2_3_4`' == '1234'
359
- fn radix_run ( & mut self , first : Option < char > , radix : Radix ) -> Cow < ' source , str > {
360
- let start = if let Some ( first) = first {
361
- self . offset ( ) - first. text_len ( )
362
- } else {
363
- self . offset ( )
364
- } ;
365
- self . cursor . eat_while ( |c| radix. is_digit ( c) ) ;
366
-
367
- let number = & self . source [ TextRange :: new ( start, self . offset ( ) ) ] ;
368
-
369
- // Number that contains `_` separators. Remove them from the parsed text.
370
- if radix. is_digit ( self . cursor . second ( ) ) && self . cursor . eat_char ( '_' ) {
371
- let mut value_text = number. to_string ( ) ;
372
-
373
- loop {
374
- if let Some ( c) = self . cursor . eat_if ( |c| radix. is_digit ( c) ) {
375
- value_text. push ( c) ;
376
- } else if self . cursor . first ( ) == '_' && radix. is_digit ( self . cursor . second ( ) ) {
377
- // Skip over `_`
378
- self . cursor . bump ( ) ;
379
- } else {
380
- break ;
381
- }
358
+ fn radix_run ( & mut self , number : & mut LexedText , radix : Radix ) {
359
+ loop {
360
+ if let Some ( c) = self . cursor . eat_if ( |c| radix. is_digit ( c) ) {
361
+ number. push ( c) ;
362
+ }
363
+ // Number that contains `_` separators. Remove them from the parsed text.
364
+ else if self . cursor . first ( ) == '_' && radix. is_digit ( self . cursor . second ( ) ) {
365
+ // Skip over `_`
366
+ self . cursor . bump ( ) ;
367
+ number. skip_char ( ) ;
368
+ } else {
369
+ break ;
382
370
}
383
-
384
- Cow :: Owned ( value_text)
385
- } else {
386
- Cow :: Borrowed ( number)
387
371
}
388
372
}
389
373
@@ -1236,6 +1220,49 @@ const fn is_python_whitespace(c: char) -> bool {
1236
1220
)
1237
1221
}
1238
1222
1223
+ enum LexedText < ' a > {
1224
+ Source { source : & ' a str , range : TextRange } ,
1225
+ Owned ( String ) ,
1226
+ }
1227
+
1228
+ impl < ' a > LexedText < ' a > {
1229
+ fn new ( start : TextSize , source : & ' a str ) -> Self {
1230
+ Self :: Source {
1231
+ range : TextRange :: empty ( start) ,
1232
+ source,
1233
+ }
1234
+ }
1235
+
1236
+ fn push ( & mut self , c : char ) {
1237
+ match self {
1238
+ LexedText :: Source { range, source } => {
1239
+ * range = range. add_end ( c. text_len ( ) ) ;
1240
+ debug_assert ! ( source[ * range] . ends_with( c) ) ;
1241
+ }
1242
+ LexedText :: Owned ( owned) => owned. push ( c) ,
1243
+ }
1244
+ }
1245
+
1246
+ fn as_str < ' b > ( & ' b self ) -> & ' b str
1247
+ where
1248
+ ' b : ' a ,
1249
+ {
1250
+ match self {
1251
+ LexedText :: Source { range, source } => & source[ * range] ,
1252
+ LexedText :: Owned ( owned) => owned,
1253
+ }
1254
+ }
1255
+
1256
+ fn skip_char ( & mut self ) {
1257
+ match self {
1258
+ LexedText :: Source { range, source } => {
1259
+ * self = LexedText :: Owned ( source[ * range] . to_string ( ) ) ;
1260
+ }
1261
+ LexedText :: Owned ( _) => { }
1262
+ }
1263
+ }
1264
+ }
1265
+
1239
1266
#[ cfg( test) ]
1240
1267
mod tests {
1241
1268
use num_bigint:: BigInt ;
0 commit comments