Skip to content

Commit 27743ef

Browse files
vjurczeniantBre
andauthored
[pylint] Implement missing-maxsplit-arg (PLC0207) (#17454)
## Summary Implements `use-maxsplit-arg` (`PLC0207`) https://pylint.readthedocs.io/en/latest/user_guide/messages/convention/use-maxsplit-arg.html > Emitted when accessing only the first or last element of str.split(). The first and last element can be accessed by using str.split(sep, maxsplit=1)[0] or str.rsplit(sep, maxsplit=1)[-1] instead. This is part of #970 ## Test Plan `cargo test` Additionally compared Ruff output to Pylint: ``` pylint --disable=all --enable=use-maxsplit-arg crates/ruff_linter/resources/test/fixtures/pylint/missing_maxsplit_arg.py cargo run -p ruff -- check crates/ruff_linter/resources/test/fixtures/pylint/missing_maxsplit_arg.py --no-cache --select PLC0207 ``` --------- Co-authored-by: Brent Westbrook <[email protected]>
1 parent c60b4d7 commit 27743ef

File tree

8 files changed

+650
-0
lines changed

8 files changed

+650
-0
lines changed
Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
SEQ = "1,2,3"
2+
3+
class Foo(str):
4+
class_str = "1,2,3"
5+
6+
def split(self, sep=None, maxsplit=-1) -> list[str]:
7+
return super().split(sep, maxsplit)
8+
9+
class Bar():
10+
split = "1,2,3"
11+
12+
# Errors
13+
## Test split called directly on string literal
14+
"1,2,3".split(",")[0] # [missing-maxsplit-arg]
15+
"1,2,3".split(",")[-1] # [missing-maxsplit-arg]
16+
"1,2,3".rsplit(",")[0] # [missing-maxsplit-arg]
17+
"1,2,3".rsplit(",")[-1] # [missing-maxsplit-arg]
18+
19+
## Test split called on string variable
20+
SEQ.split(",")[0] # [missing-maxsplit-arg]
21+
SEQ.split(",")[-1] # [missing-maxsplit-arg]
22+
SEQ.rsplit(",")[0] # [missing-maxsplit-arg]
23+
SEQ.rsplit(",")[-1] # [missing-maxsplit-arg]
24+
25+
## Test split called on class attribute
26+
Foo.class_str.split(",")[0] # [missing-maxsplit-arg]
27+
Foo.class_str.split(",")[-1] # [missing-maxsplit-arg]
28+
Foo.class_str.rsplit(",")[0] # [missing-maxsplit-arg]
29+
Foo.class_str.rsplit(",")[-1] # [missing-maxsplit-arg]
30+
31+
## Test split called on sliced string
32+
"1,2,3"[::-1].split(",")[0] # [missing-maxsplit-arg]
33+
"1,2,3"[::-1][::-1].split(",")[0] # [missing-maxsplit-arg]
34+
SEQ[:3].split(",")[0] # [missing-maxsplit-arg]
35+
Foo.class_str[1:3].split(",")[-1] # [missing-maxsplit-arg]
36+
"1,2,3"[::-1].rsplit(",")[0] # [missing-maxsplit-arg]
37+
SEQ[:3].rsplit(",")[0] # [missing-maxsplit-arg]
38+
Foo.class_str[1:3].rsplit(",")[-1] # [missing-maxsplit-arg]
39+
40+
## Test sep given as named argument
41+
"1,2,3".split(sep=",")[0] # [missing-maxsplit-arg]
42+
"1,2,3".split(sep=",")[-1] # [missing-maxsplit-arg]
43+
"1,2,3".rsplit(sep=",")[0] # [missing-maxsplit-arg]
44+
"1,2,3".rsplit(sep=",")[-1] # [missing-maxsplit-arg]
45+
46+
## Special cases
47+
"1,2,3".split("\n")[0] # [missing-maxsplit-arg]
48+
"1,2,3".split("split")[-1] # [missing-maxsplit-arg]
49+
"1,2,3".rsplit("rsplit")[0] # [missing-maxsplit-arg]
50+
51+
## Test class attribute named split
52+
Bar.split.split(",")[0] # [missing-maxsplit-arg]
53+
Bar.split.split(",")[-1] # [missing-maxsplit-arg]
54+
Bar.split.rsplit(",")[0] # [missing-maxsplit-arg]
55+
Bar.split.rsplit(",")[-1] # [missing-maxsplit-arg]
56+
57+
## Test unpacked dict literal kwargs
58+
"1,2,3".split(**{"sep": ","})[0] # [missing-maxsplit-arg]
59+
60+
61+
# OK
62+
## Test not accessing the first or last element
63+
### Test split called directly on string literal
64+
"1,2,3".split(",")[1]
65+
"1,2,3".split(",")[-2]
66+
"1,2,3".rsplit(",")[1]
67+
"1,2,3".rsplit(",")[-2]
68+
69+
### Test split called on string variable
70+
SEQ.split(",")[1]
71+
SEQ.split(",")[-2]
72+
SEQ.rsplit(",")[1]
73+
SEQ.rsplit(",")[-2]
74+
75+
### Test split called on class attribute
76+
Foo.class_str.split(",")[1]
77+
Foo.class_str.split(",")[-2]
78+
Foo.class_str.rsplit(",")[1]
79+
Foo.class_str.rsplit(",")[-2]
80+
81+
### Test split called on sliced string
82+
"1,2,3"[::-1].split(",")[1]
83+
SEQ[:3].split(",")[1]
84+
Foo.class_str[1:3].split(",")[-2]
85+
"1,2,3"[::-1].rsplit(",")[1]
86+
SEQ[:3].rsplit(",")[1]
87+
Foo.class_str[1:3].rsplit(",")[-2]
88+
89+
### Test sep given as named argument
90+
"1,2,3".split(sep=",")[1]
91+
"1,2,3".split(sep=",")[-2]
92+
"1,2,3".rsplit(sep=",")[1]
93+
"1,2,3".rsplit(sep=",")[-2]
94+
95+
## Test varying maxsplit argument
96+
### str.split() tests
97+
"1,2,3".split(sep=",", maxsplit=1)[-1]
98+
"1,2,3".split(sep=",", maxsplit=1)[0]
99+
"1,2,3".split(sep=",", maxsplit=2)[-1]
100+
"1,2,3".split(sep=",", maxsplit=2)[0]
101+
"1,2,3".split(sep=",", maxsplit=2)[1]
102+
103+
### str.rsplit() tests
104+
"1,2,3".rsplit(sep=",", maxsplit=1)[-1]
105+
"1,2,3".rsplit(sep=",", maxsplit=1)[0]
106+
"1,2,3".rsplit(sep=",", maxsplit=2)[-1]
107+
"1,2,3".rsplit(sep=",", maxsplit=2)[0]
108+
"1,2,3".rsplit(sep=",", maxsplit=2)[1]
109+
110+
## Test user-defined split
111+
Foo("1,2,3").split(",")[0]
112+
Foo("1,2,3").split(",")[-1]
113+
Foo("1,2,3").rsplit(",")[0]
114+
Foo("1,2,3").rsplit(",")[-1]
115+
116+
## Test split called on sliced list
117+
["1", "2", "3"][::-1].split(",")[0]
118+
119+
## Test class attribute named split
120+
Bar.split[0]
121+
Bar.split[-1]
122+
Bar.split[0]
123+
Bar.split[-1]
124+
125+
## Test unpacked dict literal kwargs
126+
"1,2,3".split(",", **{"maxsplit": 1})[0]
127+
"1,2,3".split(**{"sep": ",", "maxsplit": 1})[0]
128+
129+
130+
# TODO
131+
132+
## Test variable split result index
133+
## TODO: These require the ability to resolve a variable name to a value
134+
# Errors
135+
result_index = 0
136+
"1,2,3".split(",")[result_index] # TODO: [missing-maxsplit-arg]
137+
result_index = -1
138+
"1,2,3".split(",")[result_index] # TODO: [missing-maxsplit-arg]
139+
# OK
140+
result_index = 1
141+
"1,2,3".split(",")[result_index]
142+
result_index = -2
143+
"1,2,3".split(",")[result_index]
144+
145+
146+
## Test split result index modified in loop
147+
## TODO: These require the ability to recognize being in a loop where:
148+
## - the result of split called on a string is indexed by a variable
149+
## - the variable index above is modified
150+
# OK
151+
result_index = 0
152+
for j in range(3):
153+
print(SEQ.split(",")[result_index])
154+
result_index = result_index + 1
155+
156+
157+
## Test accessor
158+
## TODO: These require the ability to get the return type of a method
159+
## (possibly via `typing::is_string`)
160+
class Baz():
161+
def __init__(self):
162+
self.my_str = "1,2,3"
163+
164+
def get_string(self) -> str:
165+
return self.my_str
166+
167+
# Errors
168+
Baz().get_string().split(",")[0] # TODO: [missing-maxsplit-arg]
169+
Baz().get_string().split(",")[-1] # TODO: [missing-maxsplit-arg]
170+
# OK
171+
Baz().get_string().split(",")[1]
172+
Baz().get_string().split(",")[-2]
173+
174+
175+
## Test unpacked dict instance kwargs
176+
## TODO: These require the ability to resolve a dict variable name to a value
177+
# Errors
178+
kwargs_without_maxsplit = {"seq": ","}
179+
"1,2,3".split(**kwargs_without_maxsplit)[0] # TODO: [missing-maxsplit-arg]
180+
# OK
181+
kwargs_with_maxsplit = {"maxsplit": 1}
182+
"1,2,3".split(",", **kwargs_with_maxsplit)[0] # TODO: false positive
183+
kwargs_with_maxsplit = {"sep": ",", "maxsplit": 1}
184+
"1,2,3".split(**kwargs_with_maxsplit)[0] # TODO: false positive

crates/ruff_linter/src/checkers/ast/analyze/expression.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,9 @@ pub(crate) fn expression(expr: &Expr, checker: &Checker) {
176176
if checker.enabled(Rule::Airflow3Removal) {
177177
airflow::rules::airflow_3_removal_expr(checker, expr);
178178
}
179+
if checker.enabled(Rule::MissingMaxsplitArg) {
180+
pylint::rules::missing_maxsplit_arg(checker, value, slice, expr);
181+
}
179182
pandas_vet::rules::subscript(checker, value, expr);
180183
}
181184
Expr::Tuple(ast::ExprTuple {

crates/ruff_linter/src/codes.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
198198
(Pylint, "C0132") => (RuleGroup::Stable, rules::pylint::rules::TypeParamNameMismatch),
199199
(Pylint, "C0205") => (RuleGroup::Stable, rules::pylint::rules::SingleStringSlots),
200200
(Pylint, "C0206") => (RuleGroup::Stable, rules::pylint::rules::DictIndexMissingItems),
201+
(Pylint, "C0207") => (RuleGroup::Preview, rules::pylint::rules::MissingMaxsplitArg),
201202
(Pylint, "C0208") => (RuleGroup::Stable, rules::pylint::rules::IterationOverSet),
202203
(Pylint, "C0414") => (RuleGroup::Stable, rules::pylint::rules::UselessImportAlias),
203204
(Pylint, "C0415") => (RuleGroup::Preview, rules::pylint::rules::ImportOutsideTopLevel),

crates/ruff_linter/src/rules/pylint/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ mod tests {
231231
Path::new("bad_staticmethod_argument.py")
232232
)]
233233
#[test_case(Rule::LenTest, Path::new("len_as_condition.py"))]
234+
#[test_case(Rule::MissingMaxsplitArg, Path::new("missing_maxsplit_arg.py"))]
234235
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
235236
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
236237
let diagnostics = test_path(
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
use ruff_macros::{ViolationMetadata, derive_message_formats};
2+
use ruff_python_ast::{
3+
DictItem, Expr, ExprAttribute, ExprCall, ExprDict, ExprNumberLiteral, ExprStringLiteral,
4+
ExprSubscript, ExprUnaryOp, Keyword, Number, UnaryOp,
5+
};
6+
use ruff_python_semantic::{SemanticModel, analyze::typing};
7+
use ruff_text_size::Ranged;
8+
9+
use crate::Violation;
10+
use crate::checkers::ast::Checker;
11+
12+
/// ## What it does
13+
/// Checks for access to the first or last element of `str.split()` without
14+
/// `maxsplit=1`
15+
///
16+
/// ## Why is this bad?
17+
/// Calling `str.split()` without `maxsplit` set splits on every delimiter in the
18+
/// string. When accessing only the first or last element of the result, it
19+
/// would be more efficient to only split once.
20+
///
21+
/// ## Example
22+
/// ```python
23+
/// url = "www.example.com"
24+
/// prefix = url.split(".")[0]
25+
/// ```
26+
///
27+
/// Use instead:
28+
/// ```python
29+
/// url = "www.example.com"
30+
/// prefix = url.split(".", maxsplit=1)[0]
31+
/// ```
32+
33+
#[derive(ViolationMetadata)]
34+
pub(crate) struct MissingMaxsplitArg;
35+
36+
impl Violation for MissingMaxsplitArg {
37+
#[derive_message_formats]
38+
fn message(&self) -> String {
39+
"Accessing only the first or last element of `str.split()` without setting `maxsplit=1`"
40+
.to_string()
41+
}
42+
}
43+
44+
fn is_string(expr: &Expr, semantic: &SemanticModel) -> bool {
45+
if let Expr::Name(name) = expr {
46+
semantic
47+
.only_binding(name)
48+
.is_some_and(|binding_id| typing::is_string(semantic.binding(binding_id), semantic))
49+
} else if let Some(binding_id) = semantic.lookup_attribute(expr) {
50+
typing::is_string(semantic.binding(binding_id), semantic)
51+
} else {
52+
expr.is_string_literal_expr()
53+
}
54+
}
55+
56+
/// PLC0207
57+
pub(crate) fn missing_maxsplit_arg(checker: &Checker, value: &Expr, slice: &Expr, expr: &Expr) {
58+
// Check the sliced expression is a function
59+
let Expr::Call(ExprCall {
60+
func, arguments, ..
61+
}) = value
62+
else {
63+
return;
64+
};
65+
66+
// Check the slice index is either 0 or -1 (first or last value)
67+
let index = match slice {
68+
Expr::NumberLiteral(ExprNumberLiteral {
69+
value: Number::Int(number_value),
70+
..
71+
}) => number_value.as_i64(),
72+
Expr::UnaryOp(ExprUnaryOp {
73+
op: UnaryOp::USub,
74+
operand,
75+
..
76+
}) => match operand.as_ref() {
77+
Expr::NumberLiteral(ExprNumberLiteral {
78+
value: Number::Int(number_value),
79+
..
80+
}) => number_value.as_i64().map(|number| -number),
81+
_ => return,
82+
},
83+
_ => return,
84+
};
85+
86+
if !matches!(index, Some(0 | -1)) {
87+
return;
88+
}
89+
90+
let Expr::Attribute(ExprAttribute { attr, value, .. }) = func.as_ref() else {
91+
return;
92+
};
93+
94+
// Check the function is "split" or "rsplit"
95+
let attr = attr.as_str();
96+
if !matches!(attr, "split" | "rsplit") {
97+
return;
98+
}
99+
100+
let mut target_instance = value;
101+
// a subscripted value could technically be subscripted further ad infinitum, so we
102+
// recurse into the subscript expressions until we find the value being subscripted
103+
while let Expr::Subscript(ExprSubscript { value, .. }) = target_instance.as_ref() {
104+
target_instance = value;
105+
}
106+
107+
// Check the function is called on a string
108+
if !is_string(target_instance, checker.semantic()) {
109+
return;
110+
}
111+
112+
// Check the function does not have maxsplit set
113+
if arguments.find_argument_value("maxsplit", 1).is_some() {
114+
return;
115+
}
116+
117+
// Check maxsplit kwarg not set via unpacked dict literal
118+
for keyword in &*arguments.keywords {
119+
let Keyword { value, .. } = keyword;
120+
121+
if let Expr::Dict(ExprDict { items, .. }) = value {
122+
for item in items {
123+
let DictItem { key, .. } = item;
124+
if let Some(Expr::StringLiteral(ExprStringLiteral { value, .. })) = key {
125+
if value.to_str() == "maxsplit" {
126+
return;
127+
}
128+
}
129+
}
130+
}
131+
}
132+
133+
checker.report_diagnostic(MissingMaxsplitArg, expr.range());
134+
}

crates/ruff_linter/src/rules/pylint/rules/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ pub(crate) use logging::*;
4646
pub(crate) use magic_value_comparison::*;
4747
pub(crate) use manual_import_from::*;
4848
pub(crate) use misplaced_bare_raise::*;
49+
pub(crate) use missing_maxsplit_arg::*;
4950
pub(crate) use modified_iterating_set::*;
5051
pub(crate) use named_expr_without_context::*;
5152
pub(crate) use nan_comparison::*;
@@ -155,6 +156,7 @@ mod logging;
155156
mod magic_value_comparison;
156157
mod manual_import_from;
157158
mod misplaced_bare_raise;
159+
mod missing_maxsplit_arg;
158160
mod modified_iterating_set;
159161
mod named_expr_without_context;
160162
mod nan_comparison;

0 commit comments

Comments
 (0)