Skip to content

Commit cdc7c71

Browse files
authored
Avoid consuming trailing whitespace during re-lexing (#11933)
## Summary This PR updates the re-lexing logic to avoid consuming the trailing whitespace and move the lexer explicitly to the last newline character encountered while moving backwards. Consider the following code snippet as taken from the test case highlighted with whitespace (`.`) and newline (`\n`) characters: ```py # There are trailing whitespace before the newline character but those whitespaces are # part of the comment token f"""hello {x # comment....\n # ^ y = 1\n ``` The parser is at `y` when it's trying to recover from an unclosed `{`, so it calls into the re-lexing logic which tries to move the lexer back to the end of the previous line. But, as it consumed all whitespaces it moved the lexer to the location marked by `^` in the above code snippet. But, those whitespaces are part of the comment token. This means that the range for the two tokens were overlapping which introduced the panic. Note that this is only a bug when there's a comment with a trailing whitespace otherwise it's fine to move the lexer to the whitespace character. This is because the lexer would just skip the whitespace otherwise. Nevertheless, this PR updates the logic to move it explicitly to the newline character in all cases. fixes: #11929 ## Test Plan Add test cases and update the snapshot. Make sure that it doesn't panic on the code snippet in the linked issue.
1 parent ff3bf58 commit cdc7c71

File tree

3 files changed

+103
-14
lines changed

3 files changed

+103
-14
lines changed

crates/ruff_python_parser/resources/invalid/re_lex_logical_token.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,10 @@ def bar():
5454

5555
if call(f"hello
5656
def bar():
57-
pass
57+
pass
58+
59+
60+
# There are trailing whitespace before the newline character but those whitespaces are
61+
# part of the comment token
62+
f"""hello {x # comment
63+
y = 1

crates/ruff_python_parser/src/lexer.rs

+7-8
Original file line numberDiff line numberDiff line change
@@ -1370,25 +1370,24 @@ impl<'src> Lexer<'src> {
13701370
// i.e., it recovered from an unclosed parenthesis (`(`, `[`, or `{`).
13711371
self.nesting -= 1;
13721372

1373-
let current_position = self.current_range().start();
1373+
let mut current_position = self.current_range().start();
13741374
let reverse_chars = self.source[..current_position.to_usize()].chars().rev();
1375-
let mut new_position = current_position;
1376-
let mut has_newline = false;
1375+
let mut newline_position = None;
13771376

13781377
for ch in reverse_chars {
13791378
if is_python_whitespace(ch) {
1380-
new_position -= ch.text_len();
1379+
current_position -= ch.text_len();
13811380
} else if matches!(ch, '\n' | '\r') {
1382-
has_newline |= true;
1383-
new_position -= ch.text_len();
1381+
current_position -= ch.text_len();
1382+
newline_position = Some(current_position);
13841383
} else {
13851384
break;
13861385
}
13871386
}
13881387

13891388
// The lexer should only be moved if there's a newline character which needs to be
13901389
// re-lexed.
1391-
if new_position != current_position && has_newline {
1390+
if let Some(newline_position) = newline_position {
13921391
// Earlier we reduced the nesting level unconditionally. Now that we know the lexer's
13931392
// position is going to be moved back, the lexer needs to be put back into a
13941393
// parenthesized context if the current token is a closing parenthesis.
@@ -1410,7 +1409,7 @@ impl<'src> Lexer<'src> {
14101409
}
14111410

14121411
self.cursor = Cursor::new(self.source);
1413-
self.cursor.skip_bytes(new_position.to_usize());
1412+
self.cursor.skip_bytes(newline_position.to_usize());
14141413
self.state = State::Other;
14151414
self.next_token();
14161415
true

crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap

+89-5
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ input_file: crates/ruff_python_parser/resources/invalid/re_lex_logical_token.py
77
```
88
Module(
99
ModModule {
10-
range: 0..979,
10+
range: 0..1129,
1111
body: [
1212
If(
1313
StmtIf {
@@ -670,6 +670,53 @@ Module(
670670
],
671671
},
672672
),
673+
Expr(
674+
StmtExpr {
675+
range: 1097..1109,
676+
value: FString(
677+
ExprFString {
678+
range: 1097..1109,
679+
value: FStringValue {
680+
inner: Single(
681+
FString(
682+
FString {
683+
range: 1097..1109,
684+
elements: [
685+
Literal(
686+
FStringLiteralElement {
687+
range: 1101..1107,
688+
value: "hello ",
689+
},
690+
),
691+
Expression(
692+
FStringExpressionElement {
693+
range: 1107..1109,
694+
expression: Name(
695+
ExprName {
696+
range: 1108..1109,
697+
id: "x",
698+
ctx: Load,
699+
},
700+
),
701+
debug_text: None,
702+
conversion: None,
703+
format_spec: None,
704+
},
705+
),
706+
],
707+
flags: FStringFlags {
708+
quote_style: Double,
709+
prefix: Regular,
710+
triple_quoted: true,
711+
},
712+
},
713+
),
714+
),
715+
},
716+
},
717+
),
718+
},
719+
),
673720
],
674721
},
675722
)
@@ -831,8 +878,45 @@ Module(
831878

832879

833880
|
834-
55 | if call(f"hello
835-
56 | def bar():
836-
57 | pass
837-
| Syntax Error: Expected a statement
881+
60 | # There are trailing whitespace before the newline character but those whitespaces are
882+
61 | # part of the comment token
883+
62 | f"""hello {x # comment
884+
| Syntax Error: Expected a statement
885+
63 | y = 1
886+
|
887+
888+
889+
|
890+
60 | # There are trailing whitespace before the newline character but those whitespaces are
891+
61 | # part of the comment token
892+
62 | f"""hello {x # comment
893+
| ___________________________^
894+
63 | | y = 1
895+
| |_____^ Syntax Error: f-string: unterminated triple-quoted string
896+
|
897+
898+
899+
|
900+
61 | # part of the comment token
901+
62 | f"""hello {x # comment
902+
63 | y = 1
903+
| ^ Syntax Error: f-string: expecting '}'
904+
|
905+
906+
907+
|
908+
60 | # There are trailing whitespace before the newline character but those whitespaces are
909+
61 | # part of the comment token
910+
62 | f"""hello {x # comment
911+
| ___________________________^
912+
63 | | y = 1
913+
| |_____^ Syntax Error: Expected FStringEnd, found Unknown
914+
|
915+
916+
917+
|
918+
61 | # part of the comment token
919+
62 | f"""hello {x # comment
920+
63 | y = 1
921+
| Syntax Error: Expected a statement
838922
|

0 commit comments

Comments
 (0)