Skip to content

Commit d3cd61f

Browse files
authored
Use empty range when there's "gap" in token source (#11032)
## Summary This fixes a bug where the parser would panic when there is a "gap" in the token source. What's a gap? The reason it's `<=` instead of just `==` is because there could be whitespaces between the two tokens. For example: ```python # last token end # | current token (newline) start # v v def foo \n # ^ # assume there's trailing whitespace here ``` Or, there could tokens that are considered "trivia" and thus aren't emitted by the token source. These are comments and non-logical newlines. For example: ```python # last token end # v def foo # comment\n # ^ current token (newline) start ``` In either of the above cases, there's a "gap" between the end of the last token and start of the current token. ## Test Plan Add test cases and update the snapshots.
1 parent 9b80cc0 commit d3cd61f

File tree

5 files changed

+195
-26
lines changed

5 files changed

+195
-26
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
def foo # comment
2+
def bar(): ...
3+
def baz

crates/ruff_python_parser/src/parser/mod.rs

+53-6
Original file line numberDiff line numberDiff line change
@@ -261,12 +261,59 @@ impl<'src> Parser<'src> {
261261
}
262262

263263
fn node_range(&self, start: TextSize) -> TextRange {
264-
// It's possible during error recovery that the parsing didn't consume any tokens. In that case,
265-
// `last_token_end` still points to the end of the previous token but `start` is the start of the current token.
266-
// Calling `TextRange::new(start, self.last_token_end)` would panic in that case because `start > end`.
267-
// This path "detects" this case and creates an empty range instead.
268-
if self.node_start() == start {
269-
TextRange::empty(start)
264+
// It's possible during error recovery that the parsing didn't consume any tokens. In that
265+
// case, `last_token_end` still points to the end of the previous token but `start` is the
266+
// start of the current token. Calling `TextRange::new(start, self.last_token_end)` would
267+
// panic in that case because `start > end`. This path "detects" this case and creates an
268+
// empty range instead.
269+
//
270+
// The reason it's `<=` instead of just `==` is because there could be whitespaces between
271+
// the two tokens. For example:
272+
//
273+
// ```python
274+
// # last token end
275+
// # | current token (newline) start
276+
// # v v
277+
// def foo \n
278+
// # ^
279+
// # assume there's trailing whitespace here
280+
// ```
281+
//
282+
// Or, there could tokens that are considered "trivia" and thus aren't emitted by the token
283+
// source. These are comments and non-logical newlines. For example:
284+
//
285+
// ```python
286+
// # last token end
287+
// # v
288+
// def foo # comment\n
289+
// # ^ current token (newline) start
290+
// ```
291+
//
292+
// In either of the above cases, there's a "gap" between the end of the last token and start
293+
// of the current token.
294+
if self.last_token_end <= start {
295+
// We need to create an empty range at the last token end instead of the start because
296+
// otherwise this node range will fall outside the range of it's parent node. Taking
297+
// the above example:
298+
//
299+
// ```python
300+
// if True:
301+
// # function start
302+
// # | function end
303+
// # v v
304+
// def foo # comment
305+
// # ^ current token start
306+
// ```
307+
//
308+
// Here, the current token start is the start of parameter range but the function ends
309+
// at `foo`. Even if there's a function body, the range of parameters would still be
310+
// before the comment.
311+
312+
// test_err node_range_with_gaps
313+
// def foo # comment
314+
// def bar(): ...
315+
// def baz
316+
TextRange::empty(self.last_token_end)
270317
} else {
271318
TextRange::new(start, self.last_token_end)
272319
}

crates/ruff_python_parser/src/parser/statement.rs

+16-19
Original file line numberDiff line numberDiff line change
@@ -1663,23 +1663,19 @@ impl<'src> Parser<'src> {
16631663
// x = 10
16641664
let type_params = self.try_parse_type_params();
16651665

1666+
// test_ok function_def_parameter_range
1667+
// def foo(
1668+
// first: int,
1669+
// second: int,
1670+
// ) -> int: ...
1671+
16661672
// test_err function_def_unclosed_parameter_list
16671673
// def foo(a: int, b:
16681674
// def foo():
16691675
// return 42
16701676
// def foo(a: int, b: str
16711677
// x = 10
1672-
let parameters_start = self.node_start();
1673-
self.expect(TokenKind::Lpar);
1674-
let mut parameters = self.parse_parameters(FunctionKind::FunctionDef);
1675-
self.expect(TokenKind::Rpar);
1676-
1677-
// test_ok function_def_parameter_range
1678-
// def foo(
1679-
// first: int,
1680-
// second: int,
1681-
// ) -> int: ...
1682-
parameters.range = self.node_range(parameters_start);
1678+
let parameters = self.parse_parameters(FunctionKind::FunctionDef);
16831679

16841680
let returns = if self.eat(TokenKind::Rarrow) {
16851681
if self.at_expr() {
@@ -2844,19 +2840,16 @@ impl<'src> Parser<'src> {
28442840
pub(super) fn parse_parameters(&mut self, function_kind: FunctionKind) -> ast::Parameters {
28452841
let start = self.node_start();
28462842

2843+
if matches!(function_kind, FunctionKind::FunctionDef) {
2844+
self.expect(TokenKind::Lpar);
2845+
}
2846+
28472847
// TODO(dhruvmanila): This has the same problem as `parse_match_pattern_mapping`
28482848
// has where if there are multiple kwarg or vararg, the last one will win and
28492849
// the parser will drop the previous ones. Another thing is the vararg and kwarg
28502850
// uses `Parameter` (not `ParameterWithDefault`) which means that the parser cannot
28512851
// recover well from `*args=(1, 2)`.
2852-
let mut parameters = ast::Parameters {
2853-
range: TextRange::default(),
2854-
posonlyargs: vec![],
2855-
args: vec![],
2856-
kwonlyargs: vec![],
2857-
vararg: None,
2858-
kwarg: None,
2859-
};
2852+
let mut parameters = ast::Parameters::empty(TextRange::default());
28602853

28612854
let mut seen_default_param = false; // `a=10`
28622855
let mut seen_positional_only_separator = false; // `/`
@@ -3094,6 +3087,10 @@ impl<'src> Parser<'src> {
30943087
self.add_error(ParseErrorType::ExpectedKeywordParam, star_range);
30953088
}
30963089

3090+
if matches!(function_kind, FunctionKind::FunctionDef) {
3091+
self.expect(TokenKind::Rpar);
3092+
}
3093+
30973094
parameters.range = self.node_range(start);
30983095

30993096
// test_err params_duplicate_names
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
---
2+
source: crates/ruff_python_parser/tests/fixtures.rs
3+
input_file: crates/ruff_python_parser/resources/inline/err/node_range_with_gaps.py
4+
---
5+
## AST
6+
7+
```
8+
Module(
9+
ModModule {
10+
range: 0..41,
11+
body: [
12+
FunctionDef(
13+
StmtFunctionDef {
14+
range: 0..7,
15+
is_async: false,
16+
decorator_list: [],
17+
name: Identifier {
18+
id: "foo",
19+
range: 4..7,
20+
},
21+
type_params: None,
22+
parameters: Parameters {
23+
range: 7..7,
24+
posonlyargs: [],
25+
args: [],
26+
vararg: None,
27+
kwonlyargs: [],
28+
kwarg: None,
29+
},
30+
returns: None,
31+
body: [],
32+
},
33+
),
34+
FunctionDef(
35+
StmtFunctionDef {
36+
range: 18..32,
37+
is_async: false,
38+
decorator_list: [],
39+
name: Identifier {
40+
id: "bar",
41+
range: 22..25,
42+
},
43+
type_params: None,
44+
parameters: Parameters {
45+
range: 25..27,
46+
posonlyargs: [],
47+
args: [],
48+
vararg: None,
49+
kwonlyargs: [],
50+
kwarg: None,
51+
},
52+
returns: None,
53+
body: [
54+
Expr(
55+
StmtExpr {
56+
range: 29..32,
57+
value: EllipsisLiteral(
58+
ExprEllipsisLiteral {
59+
range: 29..32,
60+
},
61+
),
62+
},
63+
),
64+
],
65+
},
66+
),
67+
FunctionDef(
68+
StmtFunctionDef {
69+
range: 33..40,
70+
is_async: false,
71+
decorator_list: [],
72+
name: Identifier {
73+
id: "baz",
74+
range: 37..40,
75+
},
76+
type_params: None,
77+
parameters: Parameters {
78+
range: 40..40,
79+
posonlyargs: [],
80+
args: [],
81+
vararg: None,
82+
kwonlyargs: [],
83+
kwarg: None,
84+
},
85+
returns: None,
86+
body: [],
87+
},
88+
),
89+
],
90+
},
91+
)
92+
```
93+
## Errors
94+
95+
|
96+
1 | def foo # comment
97+
| ^ Syntax Error: Expected '(', found newline
98+
2 | def bar(): ...
99+
3 | def baz
100+
|
101+
102+
103+
|
104+
1 | def foo # comment
105+
2 | def bar(): ...
106+
| ^^^ Syntax Error: Expected ')', found 'def'
107+
3 | def baz
108+
|
109+
110+
111+
|
112+
1 | def foo # comment
113+
2 | def bar(): ...
114+
3 | def baz
115+
| ^ Syntax Error: Expected '(', found newline
116+
|
117+
118+
119+
|
120+
2 | def bar(): ...
121+
3 | def baz
122+
|

crates/ruff_python_parser/tests/snapshots/invalid_syntax@unterminated_fstring_newline_recovery.py.snap

+1-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ Module(
167167
conversion: None,
168168
format_spec: Some(
169169
FStringFormatSpec {
170-
range: 43..43,
170+
range: 42..42,
171171
elements: [],
172172
},
173173
),

0 commit comments

Comments
 (0)