Skip to content

Commit ac81c72

Browse files
[ruff] Ambiguous pattern passed to pytest.raises() (RUF043) (#14966)
1 parent c0b7c36 commit ac81c72

File tree

11 files changed

+575
-3
lines changed

11 files changed

+575
-3
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import re
2+
import pytest
3+
4+
def test_foo():
5+
6+
### Errors
7+
8+
with pytest.raises(FooAtTheEnd, match="foo."): ...
9+
with pytest.raises(PackageExtraSpecifier, match="Install `foo[bar]` to enjoy all features"): ...
10+
with pytest.raises(InnocentQuestion, match="Did you mean to use `Literal` instead?"): ...
11+
12+
with pytest.raises(StringConcatenation, match="Huh"
13+
"?"): ...
14+
with pytest.raises(ManuallyEscapedWindowsPathToDotFile, match="C:\\\\Users\\\\Foo\\\\.config"): ...
15+
16+
with pytest.raises(MiddleDot, match="foo.bar"): ...
17+
with pytest.raises(EndDot, match="foobar."): ...
18+
with pytest.raises(EscapedFollowedByUnescaped, match="foo\\.*bar"): ...
19+
with pytest.raises(UnescapedFollowedByEscaped, match="foo.\\*bar"): ...
20+
21+
22+
## Metasequences
23+
24+
with pytest.raises(StartOfInput, match="foo\\Abar"): ...
25+
with pytest.raises(WordBoundary, match="foo\\bbar"): ...
26+
with pytest.raises(NonWordBoundary, match="foo\\Bbar"): ...
27+
with pytest.raises(Digit, match="foo\\dbar"): ...
28+
with pytest.raises(NonDigit, match="foo\\Dbar"): ...
29+
with pytest.raises(Whitespace, match="foo\\sbar"): ...
30+
with pytest.raises(NonWhitespace, match="foo\\Sbar"): ...
31+
with pytest.raises(WordCharacter, match="foo\\wbar"): ...
32+
with pytest.raises(NonWordCharacter, match="foo\\Wbar"): ...
33+
with pytest.raises(EndOfInput, match="foo\\zbar"): ...
34+
35+
with pytest.raises(StartOfInput2, match="foobar\\A"): ...
36+
with pytest.raises(WordBoundary2, match="foobar\\b"): ...
37+
with pytest.raises(NonWordBoundary2, match="foobar\\B"): ...
38+
with pytest.raises(Digit2, match="foobar\\d"): ...
39+
with pytest.raises(NonDigit2, match="foobar\\D"): ...
40+
with pytest.raises(Whitespace2, match="foobar\\s"): ...
41+
with pytest.raises(NonWhitespace2, match="foobar\\S"): ...
42+
with pytest.raises(WordCharacter2, match="foobar\\w"): ...
43+
with pytest.raises(NonWordCharacter2, match="foobar\\W"): ...
44+
with pytest.raises(EndOfInput2, match="foobar\\z"): ...
45+
46+
47+
### Acceptable false positives
48+
49+
with pytest.raises(NameEscape, match="\\N{EN DASH}"): ...
50+
51+
52+
### No errors
53+
54+
with pytest.raises(NoMatch): ...
55+
with pytest.raises(NonLiteral, match=pattern): ...
56+
with pytest.raises(FunctionCall, match=frobnicate("qux")): ...
57+
with pytest.raises(ReEscaped, match=re.escape("foobar")): ...
58+
with pytest.raises(RawString, match=r"fo()bar"): ...
59+
with pytest.raises(RawStringPart, match=r"foo" '\bar'): ...
60+
with pytest.raises(NoMetacharacters, match="foobar"): ...
61+
with pytest.raises(EndBackslash, match="foobar\\"): ...
62+
63+
with pytest.raises(ManuallyEscaped, match="some\\.fully\\.qualified\\.name"): ...
64+
with pytest.raises(ManuallyEscapedWindowsPath, match="C:\\\\Users\\\\Foo\\\\file\\.py"): ...
65+
66+
with pytest.raises(MiddleEscapedDot, match="foo\\.bar"): ...
67+
with pytest.raises(MiddleEscapedBackslash, match="foo\\\\bar"): ...
68+
with pytest.raises(EndEscapedDot, match="foobar\\."): ...
69+
with pytest.raises(EndEscapedBackslash, match="foobar\\\\"): ...
70+
71+
72+
## Not-so-special metasequences
73+
74+
with pytest.raises(Alert, match="\\f"): ...
75+
with pytest.raises(FormFeed, match="\\f"): ...
76+
with pytest.raises(Newline, match="\\n"): ...
77+
with pytest.raises(CarriageReturn, match="\\r"): ...
78+
with pytest.raises(Tab, match="\\t"): ...
79+
with pytest.raises(VerticalTab, match="\\v"): ...
80+
with pytest.raises(HexEscape, match="\\xFF"): ...
81+
with pytest.raises(_16BitUnicodeEscape, match="\\FFFF"): ...
82+
with pytest.raises(_32BitUnicodeEscape, match="\\0010FFFF"): ...
83+
84+
## Escaped metasequences
85+
86+
with pytest.raises(Whitespace, match="foo\\\\sbar"): ...
87+
with pytest.raises(NonWhitespace, match="foo\\\\Sbar"): ...
88+
89+
## Work by accident
90+
91+
with pytest.raises(OctalEscape, match="\\042"): ...

crates/ruff_linter/src/checkers/ast/analyze/expression.rs

+3
Original file line numberDiff line numberDiff line change
@@ -1105,6 +1105,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
11051105
if checker.enabled(Rule::BatchedWithoutExplicitStrict) {
11061106
flake8_bugbear::rules::batched_without_explicit_strict(checker, call);
11071107
}
1108+
if checker.enabled(Rule::PytestRaisesAmbiguousPattern) {
1109+
ruff::rules::pytest_raises_ambiguous_pattern(checker, call);
1110+
}
11081111
}
11091112
Expr::Dict(dict) => {
11101113
if checker.any_enabled(&[

crates/ruff_linter/src/codes.rs

+1
Original file line numberDiff line numberDiff line change
@@ -985,6 +985,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
985985
(Ruff, "039") => (RuleGroup::Preview, rules::ruff::rules::UnrawRePattern),
986986
(Ruff, "040") => (RuleGroup::Preview, rules::ruff::rules::InvalidAssertMessageLiteralArgument),
987987
(Ruff, "041") => (RuleGroup::Preview, rules::ruff::rules::UnnecessaryNestedLiteral),
988+
(Ruff, "043") => (RuleGroup::Preview, rules::ruff::rules::PytestRaisesAmbiguousPattern),
988989
(Ruff, "046") => (RuleGroup::Preview, rules::ruff::rules::UnnecessaryCastToInt),
989990
(Ruff, "048") => (RuleGroup::Preview, rules::ruff::rules::MapIntVersionParsing),
990991
(Ruff, "051") => (RuleGroup::Preview, rules::ruff::rules::IfKeyInDictDel),

crates/ruff_linter/src/rules/flake8_pytest_style/rules/assertion.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ impl Violation for PytestAssertInExcept {
144144
/// ...
145145
/// ```
146146
///
147-
/// References
147+
/// ## References
148148
/// - [`pytest` documentation: `pytest.fail`](https://docs.pytest.org/en/latest/reference/reference.html#pytest-fail)
149149
#[derive(ViolationMetadata)]
150150
pub(crate) struct PytestAssertAlwaysFalse;

crates/ruff_linter/src/rules/flake8_pytest_style/rules/marks.rs

-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@ impl AlwaysFixableViolation for PytestIncorrectMarkParenthesesStyle {
104104
///
105105
/// ## References
106106
/// - [`pytest` documentation: `pytest.mark.usefixtures`](https://docs.pytest.org/en/latest/reference/reference.html#pytest-mark-usefixtures)
107-
108107
#[derive(ViolationMetadata)]
109108
pub(crate) struct PytestUseFixturesWithoutParameters;
110109

crates/ruff_linter/src/rules/flake8_pytest_style/rules/raises.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ impl Violation for PytestRaisesWithoutException {
151151
}
152152
}
153153

154-
fn is_pytest_raises(func: &Expr, semantic: &SemanticModel) -> bool {
154+
pub(crate) fn is_pytest_raises(func: &Expr, semantic: &SemanticModel) -> bool {
155155
semantic
156156
.resolve_qualified_name(func)
157157
.is_some_and(|qualified_name| matches!(qualified_name.segments(), ["pytest", "raises"]))

crates/ruff_linter/src/rules/ruff/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,7 @@ mod tests {
415415
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_0.py"))]
416416
#[test_case(Rule::UnnecessaryRegularExpression, Path::new("RUF055_1.py"))]
417417
#[test_case(Rule::UnnecessaryCastToInt, Path::new("RUF046.py"))]
418+
#[test_case(Rule::PytestRaisesAmbiguousPattern, Path::new("RUF043.py"))]
418419
fn preview_rules(rule_code: Rule, path: &Path) -> Result<()> {
419420
let snapshot = format!(
420421
"preview__{}_{}",

crates/ruff_linter/src/rules/ruff/rules/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ pub(crate) use never_union::*;
2323
pub(crate) use none_not_at_end_of_union::*;
2424
pub(crate) use parenthesize_chained_operators::*;
2525
pub(crate) use post_init_default::*;
26+
pub(crate) use pytest_raises_ambiguous_pattern::*;
2627
pub(crate) use quadratic_list_summation::*;
2728
pub(crate) use redirected_noqa::*;
2829
pub(crate) use redundant_bool_literal::*;
@@ -71,6 +72,7 @@ mod never_union;
7172
mod none_not_at_end_of_union;
7273
mod parenthesize_chained_operators;
7374
mod post_init_default;
75+
mod pytest_raises_ambiguous_pattern;
7476
mod quadratic_list_summation;
7577
mod redirected_noqa;
7678
mod redundant_bool_literal;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
use crate::checkers::ast::Checker;
2+
use crate::rules::flake8_pytest_style::rules::is_pytest_raises;
3+
use ruff_diagnostics::{Diagnostic, Violation};
4+
use ruff_macros::{derive_message_formats, ViolationMetadata};
5+
use ruff_python_ast as ast;
6+
7+
/// ## What it does
8+
/// Checks for non-raw literal string arguments passed to the `match` parameter
9+
/// of `pytest.raises()` where the string contains at least one unescaped
10+
/// regex metacharacter.
11+
///
12+
/// ## Why is this bad?
13+
/// The `match` argument is implicitly converted to a regex under the hood.
14+
/// It should be made explicit whether the string is meant to be a regex or a "plain" pattern
15+
/// by prefixing the string with the `r` suffix, escaping the metacharacter(s)
16+
/// in the string using backslashes, or wrapping the entire string in a call to
17+
/// `re.escape()`.
18+
///
19+
/// ## Example
20+
///
21+
/// ```python
22+
/// import pytest
23+
///
24+
///
25+
/// with pytest.raises(Exception, match="A full sentence."):
26+
/// do_thing_that_raises()
27+
/// ```
28+
///
29+
/// Use instead:
30+
///
31+
/// ```python
32+
/// import pytest
33+
///
34+
///
35+
/// with pytest.raises(Exception, match=r"A full sentence."):
36+
/// do_thing_that_raises()
37+
/// ```
38+
///
39+
/// Alternatively:
40+
///
41+
/// ```python
42+
/// import pytest
43+
/// import re
44+
///
45+
///
46+
/// with pytest.raises(Exception, match=re.escape("A full sentence.")):
47+
/// do_thing_that_raises()
48+
/// ```
49+
///
50+
/// or:
51+
///
52+
/// ```python
53+
/// import pytest
54+
/// import re
55+
///
56+
///
57+
/// with pytest.raises(Exception, "A full sentence\\."):
58+
/// do_thing_that_raises()
59+
/// ```
60+
///
61+
/// ## References
62+
/// - [Python documentation: `re.escape`](https://docs.python.org/3/library/re.html#re.escape)
63+
/// - [`pytest` documentation: `pytest.raises`](https://docs.pytest.org/en/latest/reference/reference.html#pytest-raises)
64+
#[derive(ViolationMetadata)]
65+
pub(crate) struct PytestRaisesAmbiguousPattern;
66+
67+
impl Violation for PytestRaisesAmbiguousPattern {
68+
#[derive_message_formats]
69+
fn message(&self) -> String {
70+
"Pattern passed to `match=` contains metacharacters but is neither escaped nor raw"
71+
.to_string()
72+
}
73+
74+
fn fix_title(&self) -> Option<String> {
75+
Some("Use a raw string or `re.escape()` to make the intention explicit".to_string())
76+
}
77+
}
78+
79+
/// RUF043
80+
pub(crate) fn pytest_raises_ambiguous_pattern(checker: &mut Checker, call: &ast::ExprCall) {
81+
if !is_pytest_raises(&call.func, checker.semantic()) {
82+
return;
83+
}
84+
85+
// It *can* be passed as a positional argument if you try very hard,
86+
// but pytest only documents it as a keyword argument, and it's quite hard pass it positionally
87+
let Some(ast::Keyword { value, .. }) = call.arguments.find_keyword("match") else {
88+
return;
89+
};
90+
91+
let ast::Expr::StringLiteral(string) = value else {
92+
return;
93+
};
94+
95+
let any_part_is_raw = string.value.iter().any(|part| part.flags.prefix().is_raw());
96+
97+
if any_part_is_raw || !string_has_unescaped_metacharacters(&string.value) {
98+
return;
99+
}
100+
101+
let diagnostic = Diagnostic::new(PytestRaisesAmbiguousPattern, string.range);
102+
103+
checker.diagnostics.push(diagnostic);
104+
}
105+
106+
fn string_has_unescaped_metacharacters(value: &ast::StringLiteralValue) -> bool {
107+
let mut escaped = false;
108+
109+
for character in value.chars() {
110+
if escaped {
111+
if escaped_char_is_regex_metasequence(character) {
112+
return true;
113+
}
114+
115+
escaped = false;
116+
continue;
117+
}
118+
119+
if character == '\\' {
120+
escaped = true;
121+
continue;
122+
}
123+
124+
if char_is_regex_metacharacter(character) {
125+
return true;
126+
}
127+
}
128+
129+
false
130+
}
131+
132+
/// Whether the sequence `\<c>` means anything special:
133+
///
134+
/// * `\A`: Start of input
135+
/// * `\b`, `\B`: Word boundary and non-word-boundary
136+
/// * `\d`, `\D`: Digit and non-digit
137+
/// * `\s`, `\S`: Whitespace and non-whitespace
138+
/// * `\w`, `\W`: Word and non-word character
139+
/// * `\z`: End of input
140+
///
141+
/// `\u`, `\U`, `\N`, `\x`, `\a`, `\f`, `\n`, `\r`, `\t`, `\v`
142+
/// are also valid in normal strings and thus do not count.
143+
/// `\b` means backspace only in character sets,
144+
/// while backreferences (e.g., `\1`) are not valid without groups,
145+
/// both of which should be caught in [`string_has_unescaped_metacharacters`].
146+
const fn escaped_char_is_regex_metasequence(c: char) -> bool {
147+
matches!(c, 'A' | 'b' | 'B' | 'd' | 'D' | 's' | 'S' | 'w' | 'W' | 'z')
148+
}
149+
150+
const fn char_is_regex_metacharacter(c: char) -> bool {
151+
matches!(
152+
c,
153+
'.' | '^' | '$' | '*' | '+' | '?' | '{' | '[' | '\\' | '|' | '('
154+
)
155+
}

0 commit comments

Comments
 (0)