Skip to content

Commit c504d7a

Browse files
authored
Track quoting style in the tokenizer (#10256)
1 parent 72c9f7e commit c504d7a

File tree

55 files changed

+4063
-3268
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+4063
-3268
lines changed

crates/ruff_linter/resources/test/fixtures/flake8_quotes/singles_escaped_unnecessary.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,7 @@
4040

4141
# Make sure we do not unescape quotes
4242
this_is_fine = "This is an \\'escaped\\' quote"
43-
this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash"
43+
this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash" # Q004
44+
45+
# Invalid escapes in bytestrings are also triggered:
46+
x = b"\xe7\xeb\x0c\xa1\x1b\x83tN\xce=x\xe9\xbe\x01\xb9\x13B_\xba\xe7\x0c2\xce\'rm\x0e\xcd\xe9.\xf8\xd2" # Q004

crates/ruff_linter/src/directives.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,7 @@ fn extract_noqa_line_for(lxr: &[LexResult], locator: &Locator, indexer: &Indexer
131131

132132
// For multi-line strings, we expect `noqa` directives on the last line of the
133133
// string.
134-
Tok::String {
135-
triple_quoted: true,
136-
..
137-
} => {
134+
Tok::String { kind, .. } if kind.is_triple_quoted() => {
138135
if locator.contains_line_break(*range) {
139136
string_mappings.push(TextRange::new(
140137
locator.line_start(range.start()),

crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ pub(crate) fn trailing_commas(
243243
// F-strings are handled as `String` token type with the complete range
244244
// of the outermost f-string. This means that the expression inside the
245245
// f-string is not checked for trailing commas.
246-
Tok::FStringStart => {
246+
Tok::FStringStart(_) => {
247247
fstrings = fstrings.saturating_add(1);
248248
None
249249
}

crates/ruff_linter/src/rules/flake8_implicit_str_concat/rules/implicit.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ pub(crate) fn implicit(
110110
{
111111
let (a_range, b_range) = match (a_tok, b_tok) {
112112
(Tok::String { .. }, Tok::String { .. }) => (*a_range, *b_range),
113-
(Tok::String { .. }, Tok::FStringStart) => {
113+
(Tok::String { .. }, Tok::FStringStart(_)) => {
114114
match indexer.fstring_ranges().innermost(b_range.start()) {
115115
Some(b_range) => (*a_range, b_range),
116116
None => continue,
@@ -122,7 +122,7 @@ pub(crate) fn implicit(
122122
None => continue,
123123
}
124124
}
125-
(Tok::FStringEnd, Tok::FStringStart) => {
125+
(Tok::FStringEnd, Tok::FStringStart(_)) => {
126126
match (
127127
indexer.fstring_ranges().innermost(a_range.start()),
128128
indexer.fstring_ranges().innermost(b_range.start()),

crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs

+17-34
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
22
use ruff_macros::{derive_message_formats, violation};
3-
use ruff_python_ast::str::{is_triple_quote, leading_quote};
43
use ruff_python_parser::lexer::LexResult;
54
use ruff_python_parser::Tok;
65
use ruff_source_file::Locator;
@@ -158,7 +157,7 @@ pub(crate) fn avoidable_escaped_quote(
158157
// ```python
159158
// f'"foo" {'nested'}"
160159
// ```
161-
if matches!(tok, Tok::String { .. } | Tok::FStringStart) {
160+
if matches!(tok, Tok::String { .. } | Tok::FStringStart(_)) {
162161
if let Some(fstring_context) = fstrings.last_mut() {
163162
fstring_context.ignore_escaped_quotes();
164163
continue;
@@ -170,16 +169,13 @@ pub(crate) fn avoidable_escaped_quote(
170169
Tok::String {
171170
value: string_contents,
172171
kind,
173-
triple_quoted,
174172
} => {
175-
if kind.is_raw() || *triple_quoted {
173+
if kind.is_raw_string() || kind.is_triple_quoted() {
176174
continue;
177175
}
178176

179177
// Check if we're using the preferred quotation style.
180-
if !leading_quote(locator.slice(tok_range)).is_some_and(|text| {
181-
contains_quote(text, quotes_settings.inline_quotes.as_char())
182-
}) {
178+
if Quote::from(kind.quote_style()) != quotes_settings.inline_quotes {
183179
continue;
184180
}
185181

@@ -192,7 +188,7 @@ pub(crate) fn avoidable_escaped_quote(
192188
let mut diagnostic = Diagnostic::new(AvoidableEscapedQuote, tok_range);
193189
let fixed_contents = format!(
194190
"{prefix}{quote}{value}{quote}",
195-
prefix = kind.as_str(),
191+
prefix = kind.prefix_str(),
196192
quote = quotes_settings.inline_quotes.opposite().as_char(),
197193
value = unescape_string(
198194
string_contents,
@@ -206,12 +202,11 @@ pub(crate) fn avoidable_escaped_quote(
206202
diagnostics.push(diagnostic);
207203
}
208204
}
209-
Tok::FStringStart => {
210-
let text = locator.slice(tok_range);
205+
Tok::FStringStart(kind) => {
211206
// Check for escaped quote only if we're using the preferred quotation
212207
// style and it isn't a triple-quoted f-string.
213-
let check_for_escaped_quote = !is_triple_quote(text)
214-
&& contains_quote(text, quotes_settings.inline_quotes.as_char());
208+
let check_for_escaped_quote = !kind.is_triple_quoted()
209+
&& Quote::from(kind.quote_style()) == quotes_settings.inline_quotes;
215210
fstrings.push(FStringContext::new(
216211
check_for_escaped_quote,
217212
tok_range,
@@ -220,9 +215,8 @@ pub(crate) fn avoidable_escaped_quote(
220215
}
221216
Tok::FStringMiddle {
222217
value: string_contents,
223-
is_raw,
224-
triple_quoted: _,
225-
} if !is_raw => {
218+
kind,
219+
} if !kind.is_raw_string() => {
226220
let Some(context) = fstrings.last_mut() else {
227221
continue;
228222
};
@@ -315,25 +309,20 @@ pub(crate) fn unnecessary_escaped_quote(
315309
Tok::String {
316310
value: string_contents,
317311
kind,
318-
triple_quoted,
319312
} => {
320-
if kind.is_raw() || *triple_quoted {
313+
if kind.is_raw_string() || kind.is_triple_quoted() {
321314
continue;
322315
}
323316

324-
let leading = match leading_quote(locator.slice(tok_range)) {
325-
Some("\"") => Quote::Double,
326-
Some("'") => Quote::Single,
327-
_ => continue,
328-
};
317+
let leading = kind.quote_style();
329318
if !contains_escaped_quote(string_contents, leading.opposite().as_char()) {
330319
continue;
331320
}
332321

333322
let mut diagnostic = Diagnostic::new(UnnecessaryEscapedQuote, tok_range);
334323
let fixed_contents = format!(
335324
"{prefix}{quote}{value}{quote}",
336-
prefix = kind.as_str(),
325+
prefix = kind.prefix_str(),
337326
quote = leading.as_char(),
338327
value = unescape_string(string_contents, leading.opposite().as_char())
339328
);
@@ -343,16 +332,11 @@ pub(crate) fn unnecessary_escaped_quote(
343332
)));
344333
diagnostics.push(diagnostic);
345334
}
346-
Tok::FStringStart => {
347-
let text = locator.slice(tok_range);
335+
Tok::FStringStart(kind) => {
348336
// Check for escaped quote only if we're using the preferred quotation
349337
// style and it isn't a triple-quoted f-string.
350-
let check_for_escaped_quote = !is_triple_quote(text);
351-
let quote_style = if contains_quote(text, Quote::Single.as_char()) {
352-
Quote::Single
353-
} else {
354-
Quote::Double
355-
};
338+
let check_for_escaped_quote = !kind.is_triple_quoted();
339+
let quote_style = Quote::from(kind.quote_style());
356340
fstrings.push(FStringContext::new(
357341
check_for_escaped_quote,
358342
tok_range,
@@ -361,9 +345,8 @@ pub(crate) fn unnecessary_escaped_quote(
361345
}
362346
Tok::FStringMiddle {
363347
value: string_contents,
364-
is_raw,
365-
triple_quoted: _,
366-
} if !is_raw => {
348+
kind,
349+
} if !kind.is_raw_string() => {
367350
let Some(context) = fstrings.last_mut() else {
368351
continue;
369352
};

crates/ruff_linter/src/rules/flake8_quotes/rules/check_string_quotes.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ struct FStringRangeBuilder {
383383
impl FStringRangeBuilder {
384384
fn visit_token(&mut self, token: &Tok, range: TextRange) {
385385
match token {
386-
Tok::FStringStart => {
386+
Tok::FStringStart(_) => {
387387
if self.nesting == 0 {
388388
self.start_location = range.start();
389389
}

crates/ruff_linter/src/rules/flake8_quotes/settings.rs

+9
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,15 @@ impl Default for Quote {
2222
}
2323
}
2424

25+
impl From<ruff_python_parser::QuoteStyle> for Quote {
26+
fn from(value: ruff_python_parser::QuoteStyle) -> Self {
27+
match value {
28+
ruff_python_parser::QuoteStyle::Double => Self::Double,
29+
ruff_python_parser::QuoteStyle::Single => Self::Single,
30+
}
31+
}
32+
}
33+
2534
#[derive(Debug, CacheKey)]
2635
pub struct Settings {
2736
pub inline_quotes: Quote,

crates/ruff_linter/src/rules/flake8_quotes/snapshots/ruff_linter__rules__flake8_quotes__tests__require_doubles_over_singles_escaped_unnecessary.py.snap

+21-3
Original file line numberDiff line numberDiff line change
@@ -326,16 +326,34 @@ singles_escaped_unnecessary.py:43:26: Q004 [*] Unnecessary escape on inner quote
326326
|
327327
41 | # Make sure we do not unescape quotes
328328
42 | this_is_fine = "This is an \\'escaped\\' quote"
329-
43 | this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash"
329+
43 | this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash" # Q004
330330
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Q004
331+
44 |
332+
45 | # Invalid escapes in bytestrings are also triggered:
331333
|
332334
= help: Remove backslash
333335

334336
Safe fix
335337
40 40 |
336338
41 41 | # Make sure we do not unescape quotes
337339
42 42 | this_is_fine = "This is an \\'escaped\\' quote"
338-
43 |-this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash"
339-
43 |+this_should_raise_Q004 = "This is an \\'escaped\\' quote with an extra backslash"
340+
43 |-this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash" # Q004
341+
43 |+this_should_raise_Q004 = "This is an \\'escaped\\' quote with an extra backslash" # Q004
342+
44 44 |
343+
45 45 | # Invalid escapes in bytestrings are also triggered:
344+
46 46 | x = b"\xe7\xeb\x0c\xa1\x1b\x83tN\xce=x\xe9\xbe\x01\xb9\x13B_\xba\xe7\x0c2\xce\'rm\x0e\xcd\xe9.\xf8\xd2" # Q004
340345

346+
singles_escaped_unnecessary.py:46:5: Q004 [*] Unnecessary escape on inner quote character
347+
|
348+
45 | # Invalid escapes in bytestrings are also triggered:
349+
46 | x = b"\xe7\xeb\x0c\xa1\x1b\x83tN\xce=x\xe9\xbe\x01\xb9\x13B_\xba\xe7\x0c2\xce\'rm\x0e\xcd\xe9.\xf8\xd2" # Q004
350+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Q004
351+
|
352+
= help: Remove backslash
341353

354+
Safe fix
355+
43 43 | this_should_raise_Q004 = "This is an \\\'escaped\\\' quote with an extra backslash" # Q004
356+
44 44 |
357+
45 45 | # Invalid escapes in bytestrings are also triggered:
358+
46 |-x = b"\xe7\xeb\x0c\xa1\x1b\x83tN\xce=x\xe9\xbe\x01\xb9\x13B_\xba\xe7\x0c2\xce\'rm\x0e\xcd\xe9.\xf8\xd2" # Q004
359+
46 |+x = b"\xe7\xeb\x0c\xa1\x1b\x83tN\xce=x\xe9\xbe\x01\xb9\x13B_\xba\xe7\x0c2\xce'rm\x0e\xcd\xe9.\xf8\xd2" # Q004

crates/ruff_linter/src/rules/pycodestyle/rules/invalid_escape_sequence.rs

+8-14
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use memchr::memchr_iter;
33
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
44
use ruff_macros::{derive_message_formats, violation};
55
use ruff_python_index::Indexer;
6-
use ruff_python_parser::{StringKind, Tok};
6+
use ruff_python_parser::Tok;
77
use ruff_source_file::Locator;
88
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
99

@@ -66,21 +66,21 @@ pub(crate) fn invalid_escape_sequence(
6666
token: &Tok,
6767
token_range: TextRange,
6868
) {
69-
let (token_source_code, string_start_location) = match token {
70-
Tok::FStringMiddle { value, is_raw, .. } => {
71-
if *is_raw {
69+
let (token_source_code, string_start_location, kind) = match token {
70+
Tok::FStringMiddle { value, kind } => {
71+
if kind.is_raw_string() {
7272
return;
7373
}
7474
let Some(range) = indexer.fstring_ranges().innermost(token_range.start()) else {
7575
return;
7676
};
77-
(&**value, range.start())
77+
(&**value, range.start(), kind)
7878
}
7979
Tok::String { kind, .. } => {
80-
if kind.is_raw() {
80+
if kind.is_raw_string() {
8181
return;
8282
}
83-
(locator.slice(token_range), token_range.start())
83+
(locator.slice(token_range), token_range.start(), kind)
8484
}
8585
_ => return,
8686
};
@@ -207,13 +207,7 @@ pub(crate) fn invalid_escape_sequence(
207207
invalid_escape_char.range(),
208208
);
209209

210-
if matches!(
211-
token,
212-
Tok::String {
213-
kind: StringKind::Unicode,
214-
..
215-
}
216-
) {
210+
if kind.is_u_string() {
217211
// Replace the Unicode prefix with `r`.
218212
diagnostic.set_fix(Fix::safe_edit(Edit::replacement(
219213
"r".to_string(),

crates/ruff_linter/src/rules/pylint/rules/bad_string_format_character.rs

+9-12
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,14 @@ use std::str::FromStr;
22

33
use ruff_diagnostics::{Diagnostic, Violation};
44
use ruff_macros::{derive_message_formats, violation};
5-
use ruff_python_ast::str::{leading_quote, trailing_quote};
65
use ruff_python_ast::Expr;
76
use ruff_python_literal::{
87
cformat::{CFormatErrorType, CFormatString},
98
format::FormatPart,
109
format::FromTemplate,
1110
format::{FormatSpec, FormatSpecError, FormatString},
1211
};
13-
use ruff_python_parser::{lexer, Mode};
12+
use ruff_python_parser::{lexer, Mode, StringKind, Tok};
1413
use ruff_text_size::{Ranged, TextRange};
1514

1615
use crate::checkers::ast::Checker;
@@ -93,15 +92,15 @@ pub(crate) fn call(checker: &mut Checker, string: &str, range: TextRange) {
9392
/// Ex) `"%z" % "1"`
9493
pub(crate) fn percent(checker: &mut Checker, expr: &Expr) {
9594
// Grab each string segment (in case there's an implicit concatenation).
96-
let mut strings: Vec<TextRange> = vec![];
95+
let mut strings: Vec<(TextRange, StringKind)> = vec![];
9796
for (tok, range) in
9897
lexer::lex_starts_at(checker.locator().slice(expr), Mode::Module, expr.start()).flatten()
9998
{
100-
if tok.is_string() {
101-
strings.push(range);
102-
} else if tok.is_percent() {
99+
match tok {
100+
Tok::String { kind, .. } => strings.push((range, kind)),
103101
// Break as soon as we find the modulo symbol.
104-
break;
102+
Tok::Percent => break,
103+
_ => {}
105104
}
106105
}
107106

@@ -110,12 +109,10 @@ pub(crate) fn percent(checker: &mut Checker, expr: &Expr) {
110109
return;
111110
}
112111

113-
for range in &strings {
112+
for (range, kind) in &strings {
114113
let string = checker.locator().slice(*range);
115-
let (Some(leader), Some(trailer)) = (leading_quote(string), trailing_quote(string)) else {
116-
return;
117-
};
118-
let string = &string[leader.len()..string.len() - trailer.len()];
114+
let string = &string
115+
[usize::from(kind.opener_len())..(string.len() - usize::from(kind.closer_len()))];
119116

120117
// Parse the format string (e.g. `"%s"`) into a list of `PercentFormat`.
121118
if let Err(format_error) = CFormatString::from_str(string) {

0 commit comments

Comments
 (0)