Skip to content

Commit 7440e54

Browse files
authored
Avoid allocating in lex_decimal (#7252)
1 parent 0357e80 commit 7440e54

File tree

1 file changed

+71
-44
lines changed

1 file changed

+71
-44
lines changed

crates/ruff_python_parser/src/lexer.rs

Lines changed: 71 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
//!
2929
//! [Lexical analysis]: https://docs.python.org/3/reference/lexical_analysis.html
3030
31-
use std::borrow::Cow;
3231
use std::iter::FusedIterator;
3332
use std::{char, cmp::Ordering, str::FromStr};
3433

@@ -263,9 +262,10 @@ impl<'source> Lexer<'source> {
263262
'x' | 'o' | 'b'
264263
));
265264

266-
let value_text = self.radix_run(None, radix);
265+
let mut number = LexedText::new(self.offset(), self.source);
266+
self.radix_run(&mut number, radix);
267267
let value =
268-
BigInt::from_str_radix(&value_text, radix.as_u32()).map_err(|e| LexicalError {
268+
BigInt::from_str_radix(number.as_str(), radix.as_u32()).map_err(|e| LexicalError {
269269
error: LexicalErrorType::OtherError(format!("{e:?}")),
270270
location: self.token_range().start(),
271271
})?;
@@ -278,15 +278,14 @@ impl<'source> Lexer<'source> {
278278
debug_assert!(self.cursor.previous().is_ascii_digit() || self.cursor.previous() == '.');
279279
let start_is_zero = first_digit_or_dot == '0';
280280

281-
let mut value_text = if first_digit_or_dot == '.' {
282-
String::new()
283-
} else {
284-
self.radix_run(Some(first_digit_or_dot), Radix::Decimal)
285-
.into_owned()
281+
let mut number = LexedText::new(self.token_start(), self.source);
282+
if first_digit_or_dot != '.' {
283+
number.push(first_digit_or_dot);
284+
self.radix_run(&mut number, Radix::Decimal);
286285
};
287286

288287
let is_float = if first_digit_or_dot == '.' || self.cursor.eat_char('.') {
289-
value_text.push('.');
288+
number.push('.');
290289

291290
if self.cursor.eat_char('_') {
292291
return Err(LexicalError {
@@ -295,7 +294,7 @@ impl<'source> Lexer<'source> {
295294
});
296295
}
297296

298-
value_text.push_str(&self.radix_run(None, Radix::Decimal));
297+
self.radix_run(&mut number, Radix::Decimal);
299298
true
300299
} else {
301300
// Normal number:
@@ -304,14 +303,14 @@ impl<'source> Lexer<'source> {
304303

305304
let is_float = match self.cursor.rest().as_bytes() {
306305
[b'e' | b'E', b'0'..=b'9', ..] | [b'e' | b'E', b'-' | b'+', b'0'..=b'9', ..] => {
307-
value_text.push('e');
308-
self.cursor.bump(); // e | E
306+
// 'e' | 'E'
307+
number.push(self.cursor.bump().unwrap());
309308

310309
if let Some(sign) = self.cursor.eat_if(|c| matches!(c, '+' | '-')) {
311-
value_text.push(sign);
310+
number.push(sign);
312311
}
313312

314-
value_text.push_str(&self.radix_run(None, Radix::Decimal));
313+
self.radix_run(&mut number, Radix::Decimal);
315314

316315
true
317316
}
@@ -320,7 +319,7 @@ impl<'source> Lexer<'source> {
320319

321320
if is_float {
322321
// Improvement: Use `Cow` instead of pushing to value text
323-
let value = f64::from_str(&value_text).map_err(|_| LexicalError {
322+
let value = f64::from_str(number.as_str()).map_err(|_| LexicalError {
324323
error: LexicalErrorType::OtherError("Invalid decimal literal".to_owned()),
325324
location: self.token_start(),
326325
})?;
@@ -337,10 +336,10 @@ impl<'source> Lexer<'source> {
337336
} else {
338337
// Parse trailing 'j':
339338
if self.cursor.eat_if(|c| matches!(c, 'j' | 'J')).is_some() {
340-
let imag = f64::from_str(&value_text).unwrap();
339+
let imag = f64::from_str(number.as_str()).unwrap();
341340
Ok(Tok::Complex { real: 0.0, imag })
342341
} else {
343-
let value = value_text.parse::<BigInt>().unwrap();
342+
let value = number.as_str().parse::<BigInt>().unwrap();
344343
if start_is_zero && !value.is_zero() {
345344
// leading zeros in decimal integer literals are not permitted
346345
return Err(LexicalError {
@@ -356,34 +355,19 @@ impl<'source> Lexer<'source> {
356355
/// Consume a sequence of numbers with the given radix,
357356
/// the digits can be decorated with underscores
358357
/// like this: '`1_2_3_4`' == '1234'
359-
fn radix_run(&mut self, first: Option<char>, radix: Radix) -> Cow<'source, str> {
360-
let start = if let Some(first) = first {
361-
self.offset() - first.text_len()
362-
} else {
363-
self.offset()
364-
};
365-
self.cursor.eat_while(|c| radix.is_digit(c));
366-
367-
let number = &self.source[TextRange::new(start, self.offset())];
368-
369-
// Number that contains `_` separators. Remove them from the parsed text.
370-
if radix.is_digit(self.cursor.second()) && self.cursor.eat_char('_') {
371-
let mut value_text = number.to_string();
372-
373-
loop {
374-
if let Some(c) = self.cursor.eat_if(|c| radix.is_digit(c)) {
375-
value_text.push(c);
376-
} else if self.cursor.first() == '_' && radix.is_digit(self.cursor.second()) {
377-
// Skip over `_`
378-
self.cursor.bump();
379-
} else {
380-
break;
381-
}
358+
fn radix_run(&mut self, number: &mut LexedText, radix: Radix) {
359+
loop {
360+
if let Some(c) = self.cursor.eat_if(|c| radix.is_digit(c)) {
361+
number.push(c);
362+
}
363+
// Number that contains `_` separators. Remove them from the parsed text.
364+
else if self.cursor.first() == '_' && radix.is_digit(self.cursor.second()) {
365+
// Skip over `_`
366+
self.cursor.bump();
367+
number.skip_char();
368+
} else {
369+
break;
382370
}
383-
384-
Cow::Owned(value_text)
385-
} else {
386-
Cow::Borrowed(number)
387371
}
388372
}
389373

@@ -1236,6 +1220,49 @@ const fn is_python_whitespace(c: char) -> bool {
12361220
)
12371221
}
12381222

1223+
enum LexedText<'a> {
1224+
Source { source: &'a str, range: TextRange },
1225+
Owned(String),
1226+
}
1227+
1228+
impl<'a> LexedText<'a> {
1229+
fn new(start: TextSize, source: &'a str) -> Self {
1230+
Self::Source {
1231+
range: TextRange::empty(start),
1232+
source,
1233+
}
1234+
}
1235+
1236+
fn push(&mut self, c: char) {
1237+
match self {
1238+
LexedText::Source { range, source } => {
1239+
*range = range.add_end(c.text_len());
1240+
debug_assert!(source[*range].ends_with(c));
1241+
}
1242+
LexedText::Owned(owned) => owned.push(c),
1243+
}
1244+
}
1245+
1246+
fn as_str<'b>(&'b self) -> &'b str
1247+
where
1248+
'b: 'a,
1249+
{
1250+
match self {
1251+
LexedText::Source { range, source } => &source[*range],
1252+
LexedText::Owned(owned) => owned,
1253+
}
1254+
}
1255+
1256+
fn skip_char(&mut self) {
1257+
match self {
1258+
LexedText::Source { range, source } => {
1259+
*self = LexedText::Owned(source[*range].to_string());
1260+
}
1261+
LexedText::Owned(_) => {}
1262+
}
1263+
}
1264+
}
1265+
12391266
#[cfg(test)]
12401267
mod tests {
12411268
use num_bigint::BigInt;

0 commit comments

Comments
 (0)