Skip to content

Commit cf4ab7c

Browse files
GlyphackdhruvmanilaMichaReiser
authored
Parse triple quoted string annotations as if parenthesized (#15387)
## Summary Resolves #9467 Parse quoted annotations as if the string content is inside parenthesis. With this logic `x` and `y` in this example are equal: ```python y: """ int | str """ z: """( int | str ) """ ``` Also this rule only applies to triple quotes([link](python/typing-council#9 (comment))). This PR is based on the [comments](#9467 (comment)) on the issue. I did one extra change, since we don't want any indentation tokens I am setting the `State::Other` as the initial state of the Lexer. Remaining work: - [x] Add a test case for red-knot. - [x] Add more tests. ## Test Plan Added a test which previously failed because quoted annotation contained indentation. Added an mdtest for red-knot. Updated previous test. Co-authored-by: Dhruv Manilawala <[email protected]> Co-authored-by: Micha Reiser <[email protected]>
1 parent d2656e8 commit cf4ab7c

File tree

10 files changed

+295
-31
lines changed

10 files changed

+295
-31
lines changed

crates/red_knot_python_semantic/resources/mdtest/annotations/string.md

+37
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,40 @@ p: "call()"
173173
r: "[1, 2]"
174174
s: "(1, 2)"
175175
```
176+
177+
## Multi line annotation
178+
179+
Quoted type annotations should be parsed as if surrounded by parentheses.
180+
181+
```py
182+
def valid(
183+
a1: """(
184+
int |
185+
str
186+
)
187+
""",
188+
a2: """
189+
int |
190+
str
191+
""",
192+
):
193+
reveal_type(a1) # revealed: int | str
194+
reveal_type(a2) # revealed: int | str
195+
196+
def invalid(
197+
# error: [invalid-syntax-in-forward-annotation]
198+
a1: """
199+
int |
200+
str)
201+
""",
202+
# error: [invalid-syntax-in-forward-annotation]
203+
a2: """
204+
int) |
205+
str
206+
""",
207+
# error: [invalid-syntax-in-forward-annotation]
208+
a3: """
209+
(int)) """,
210+
):
211+
pass
212+
```

crates/red_knot_python_semantic/src/types/string_annotation.rs

+5-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use ruff_db::source::source_text;
22
use ruff_python_ast::str::raw_contents;
3-
use ruff_python_ast::{self as ast, ModExpression, StringFlags};
4-
use ruff_python_parser::{parse_expression_range, Parsed};
3+
use ruff_python_ast::{self as ast, ModExpression};
4+
use ruff_python_parser::Parsed;
55
use ruff_text_size::Ranged;
66

77
use crate::declare_lint;
@@ -153,19 +153,9 @@ pub(crate) fn parse_string_annotation(
153153
} else if raw_contents(node_text)
154154
.is_some_and(|raw_contents| raw_contents == string_literal.as_str())
155155
{
156-
let range_excluding_quotes = string_literal
157-
.range()
158-
.add_start(string_literal.flags.opener_len())
159-
.sub_end(string_literal.flags.closer_len());
160-
161-
// TODO: Support multiline strings like:
162-
// ```py
163-
// x: """
164-
// int
165-
// | float
166-
// """ = 1
167-
// ```
168-
match parse_expression_range(source.as_str(), range_excluding_quotes) {
156+
let parsed =
157+
ruff_python_parser::parse_string_annotation(source.as_str(), string_literal);
158+
match parsed {
169159
Ok(parsed) => return Some(parsed),
170160
Err(parse_error) => context.report_lint(
171161
&INVALID_SYNTAX_IN_FORWARD_ANNOTATION,

crates/ruff_linter/resources/test/fixtures/pyflakes/F722.py

+31
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,34 @@ def g() -> "///":
1111

1212

1313
X: """List[int]"""'☃' = []
14+
15+
# Type annotations with triple quotes can contain newlines and indentation
16+
# https://github.com/python/typing-council/issues/9
17+
y: """
18+
19+
int |
20+
str
21+
"""
22+
23+
z: """(
24+
25+
int |
26+
str
27+
)
28+
"""
29+
30+
invalid1: """
31+
int |
32+
str)
33+
"""
34+
35+
invalid2: """
36+
int) |
37+
str
38+
"""
39+
invalid3: """
40+
((int)
41+
"""
42+
invalid4: """
43+
(int
44+
"""

crates/ruff_linter/resources/test/fixtures/pyupgrade/UP037_2.pyi

-3
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,6 @@ a: '''\\
3333
list[int]''' = [42]
3434

3535

36-
# TODO: These are valid too. String annotations are assumed to be enclosed in parentheses.
37-
# https://github.com/astral-sh/ruff/issues/9467
38-
3936
def f(a: '''
4037
list[int]
4138
''' = []): ...

crates/ruff_linter/src/rules/pyflakes/snapshots/ruff_linter__rules__pyflakes__tests__F722_F722.py.snap

+64
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,68 @@ F722.py:13:4: F722 Syntax error in forward annotation: `List[int]☃`
1313
|
1414
13 | X: """List[int]"""'' = []
1515
| ^^^^^^^^^^^^^^^^^^ F722
16+
14 |
17+
15 | # Type annotations with triple quotes can contain newlines and indentation
18+
|
19+
20+
F722.py:30:11: F722 Syntax error in forward annotation: `
21+
int |
22+
str)
23+
`
24+
|
25+
28 | """
26+
29 |
27+
30 | invalid1: """
28+
| ___________^
29+
31 | | int |
30+
32 | | str)
31+
33 | | """
32+
| |___^ F722
33+
34 |
34+
35 | invalid2: """
35+
|
36+
37+
F722.py:35:11: F722 Syntax error in forward annotation: `
38+
int) |
39+
str
40+
`
41+
|
42+
33 | """
43+
34 |
44+
35 | invalid2: """
45+
| ___________^
46+
36 | | int) |
47+
37 | | str
48+
38 | | """
49+
| |___^ F722
50+
39 | invalid3: """
51+
40 | ((int)
52+
|
53+
54+
F722.py:39:11: F722 Syntax error in forward annotation: `
55+
((int)
56+
`
57+
|
58+
37 | str
59+
38 | """
60+
39 | invalid3: """
61+
| ___________^
62+
40 | | ((int)
63+
41 | | """
64+
| |___^ F722
65+
42 | invalid4: """
66+
43 | (int
67+
|
68+
69+
F722.py:42:11: F722 Syntax error in forward annotation: `
70+
(int
71+
`
72+
|
73+
40 | ((int)
74+
41 | """
75+
42 | invalid4: """
76+
| ___________^
77+
43 | | (int
78+
44 | | """
79+
| |___^ F722
1680
|

crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP037_2.pyi.snap

+72-1
Original file line numberDiff line numberDiff line change
@@ -158,4 +158,75 @@ UP037_2.pyi:32:4: UP037 [*] Remove quotes from type annotation
158158
33 |+list[int]) = [42]
159159
34 34 |
160160
35 35 |
161-
36 36 | # TODO: These are valid too. String annotations are assumed to be enclosed in parentheses.
161+
36 36 | def f(a: '''
162+
163+
UP037_2.pyi:36:10: UP037 [*] Remove quotes from type annotation
164+
|
165+
36 | def f(a: '''
166+
| __________^
167+
37 | | list[int]
168+
38 | | ''' = []): ...
169+
| |_______^ UP037
170+
|
171+
= help: Remove quotes
172+
173+
Safe fix
174+
33 33 | list[int]''' = [42]
175+
34 34 |
176+
35 35 |
177+
36 |-def f(a: '''
178+
36 |+def f(a:
179+
37 37 | list[int]
180+
38 |- ''' = []): ...
181+
38 |+ = []): ...
182+
39 39 |
183+
40 40 |
184+
41 41 | def f(a: Foo['''
185+
186+
UP037_2.pyi:41:14: UP037 [*] Remove quotes from type annotation
187+
|
188+
41 | def f(a: Foo['''
189+
| ______________^
190+
42 | | Bar
191+
43 | | [
192+
44 | | Multi |
193+
45 | | Line
194+
46 | | ] # Comment''']): ...
195+
| |___________________^ UP037
196+
|
197+
= help: Remove quotes
198+
199+
Safe fix
200+
38 38 | ''' = []): ...
201+
39 39 |
202+
40 40 |
203+
41 |-def f(a: Foo['''
204+
41 |+def f(a: Foo[(
205+
42 42 | Bar
206+
43 43 | [
207+
44 44 | Multi |
208+
45 45 | Line
209+
46 |- ] # Comment''']): ...
210+
46 |+ ] # Comment
211+
47 |+)]): ...
212+
47 48 |
213+
48 49 |
214+
49 50 | a: '''list
215+
216+
UP037_2.pyi:49:4: UP037 [*] Remove quotes from type annotation
217+
|
218+
49 | a: '''list
219+
| ____^
220+
50 | | [int]''' = [42]
221+
| |________^ UP037
222+
|
223+
= help: Remove quotes
224+
225+
Safe fix
226+
46 46 | ] # Comment''']): ...
227+
47 47 |
228+
48 48 |
229+
49 |-a: '''list
230+
50 |-[int]''' = [42]
231+
49 |+a: (list
232+
50 |+[int]) = [42]

crates/ruff_python_parser/src/lexer.rs

+13-3
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,21 @@ impl<'src> Lexer<'src> {
8484
"Lexer only supports files with a size up to 4GB"
8585
);
8686

87+
let (state, nesting) = if mode == Mode::ParenthesizedExpression {
88+
(State::Other, 1)
89+
} else {
90+
(State::AfterNewline, 0)
91+
};
92+
8793
let mut lexer = Lexer {
8894
source,
8995
cursor: Cursor::new(source),
90-
state: State::AfterNewline,
96+
state,
9197
current_kind: TokenKind::EndOfFile,
9298
current_range: TextRange::empty(start_offset),
9399
current_value: TokenValue::None,
94100
current_flags: TokenFlags::empty(),
95-
nesting: 0,
101+
nesting,
96102
indentations: Indentations::default(),
97103
pending_indentation: None,
98104
mode,
@@ -1309,7 +1315,11 @@ impl<'src> Lexer<'src> {
13091315
fn consume_end(&mut self) -> TokenKind {
13101316
// We reached end of file.
13111317
// First of all, we need all nestings to be finished.
1312-
if self.nesting > 0 {
1318+
// For Mode::ParenthesizedExpression we start with nesting level 1.
1319+
// So we check if we end with that level.
1320+
let init_nesting = u32::from(self.mode == Mode::ParenthesizedExpression);
1321+
1322+
if self.nesting > init_nesting {
13131323
// Reset the nesting to avoid going into infinite loop.
13141324
self.nesting = 0;
13151325
return self.push_error(LexicalError::new(LexicalErrorType::Eof, self.token_range()));

crates/ruff_python_parser/src/lib.rs

+67-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ pub use crate::token::{Token, TokenKind};
7272

7373
use crate::parser::Parser;
7474

75-
use ruff_python_ast::{Expr, Mod, ModExpression, ModModule, PySourceType, Suite};
75+
use ruff_python_ast::{
76+
Expr, Mod, ModExpression, ModModule, PySourceType, StringFlags, StringLiteral, Suite,
77+
};
7678
use ruff_python_trivia::CommentRanges;
7779
use ruff_text_size::{Ranged, TextRange, TextSize};
7880

@@ -166,6 +168,65 @@ pub fn parse_expression_range(
166168
.into_result()
167169
}
168170

171+
/// Parses a Python expression as if it is parenthesized.
172+
///
173+
/// It behaves similarly to [`parse_expression_range`] but allows what would be valid within parenthesis
174+
///
175+
/// # Example
176+
///
177+
/// Parsing an expression that would be valid within parenthesis:
178+
///
179+
/// ```
180+
/// use ruff_python_parser::parse_parenthesized_expression_range;
181+
/// # use ruff_text_size::{TextRange, TextSize};
182+
///
183+
/// let parsed = parse_parenthesized_expression_range("'''\n int | str'''", TextRange::new(TextSize::new(3), TextSize::new(14)));
184+
/// assert!(parsed.is_ok());
185+
pub fn parse_parenthesized_expression_range(
186+
source: &str,
187+
range: TextRange,
188+
) -> Result<Parsed<ModExpression>, ParseError> {
189+
let source = &source[..range.end().to_usize()];
190+
let parsed =
191+
Parser::new_starts_at(source, Mode::ParenthesizedExpression, range.start()).parse();
192+
parsed.try_into_expression().unwrap().into_result()
193+
}
194+
195+
/// Parses a Python expression from a string annotation.
196+
///
197+
/// # Example
198+
///
199+
/// Parsing a string annotation:
200+
///
201+
/// ```
202+
/// use ruff_python_parser::parse_string_annotation;
203+
/// use ruff_python_ast::{StringLiteral, StringLiteralFlags};
204+
/// use ruff_text_size::{TextRange, TextSize};
205+
///
206+
/// let string = StringLiteral {
207+
/// value: "'''\n int | str'''".to_string().into_boxed_str(),
208+
/// flags: StringLiteralFlags::default(),
209+
/// range: TextRange::new(TextSize::new(0), TextSize::new(16)),
210+
/// };
211+
/// let parsed = parse_string_annotation("'''\n int | str'''", &string);
212+
/// assert!(!parsed.is_ok());
213+
/// ```
214+
pub fn parse_string_annotation(
215+
source: &str,
216+
string: &StringLiteral,
217+
) -> Result<Parsed<ModExpression>, ParseError> {
218+
let range = string
219+
.range()
220+
.add_start(string.flags.opener_len())
221+
.sub_end(string.flags.closer_len());
222+
let source = &source[..range.end().to_usize()];
223+
if string.flags.is_triple_quoted() {
224+
parse_parenthesized_expression_range(source, range)
225+
} else {
226+
parse_expression_range(source, range)
227+
}
228+
}
229+
169230
/// Parse the given Python source code using the specified [`Mode`].
170231
///
171232
/// This function is the most general function to parse Python code. Based on the [`Mode`] supplied,
@@ -582,6 +643,11 @@ pub enum Mode {
582643
/// The code consists of a single expression.
583644
Expression,
584645

646+
/// The code consists of a single expression and is parsed as if it is parenthesized. The parentheses themselves aren't required.
647+
/// This allows for having valid multiline expression without the need of parentheses
648+
/// and is specifically useful for parsing string annotations.
649+
ParenthesizedExpression,
650+
585651
/// The code consists of a sequence of statements which can include the
586652
/// escape commands that are part of IPython syntax.
587653
///

0 commit comments

Comments
 (0)