Skip to content

Commit ffea1bb

Browse files
[refurb] Implement write-whole-file (FURB103) (#10802)
## Summary Implement `write-whole-file` (`FURB103`), part of #1348. This is largely a copy and paste of `read-whole-file` #7682. ## Test Plan Text fixture added. --------- Co-authored-by: Dhruv Manilawala <[email protected]>
1 parent ac14d18 commit ffea1bb

File tree

11 files changed

+640
-189
lines changed

11 files changed

+640
-189
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
def foo():
2+
...
3+
4+
5+
def bar(x):
6+
...
7+
8+
9+
# Errors.
10+
11+
# FURB103
12+
with open("file.txt", "w") as f:
13+
f.write("test")
14+
15+
# FURB103
16+
with open("file.txt", "wb") as f:
17+
f.write(foobar)
18+
19+
# FURB103
20+
with open("file.txt", mode="wb") as f:
21+
f.write(b"abc")
22+
23+
# FURB103
24+
with open("file.txt", "w", encoding="utf8") as f:
25+
f.write(foobar)
26+
27+
# FURB103
28+
with open("file.txt", "w", errors="ignore") as f:
29+
f.write(foobar)
30+
31+
# FURB103
32+
with open("file.txt", mode="w") as f:
33+
f.write(foobar)
34+
35+
# FURB103
36+
with open(foo(), "wb") as f:
37+
# The body of `with` is non-trivial, but the recommendation holds.
38+
bar("pre")
39+
f.write(bar())
40+
bar("post")
41+
print("Done")
42+
43+
# FURB103
44+
with open("a.txt", "w") as a, open("b.txt", "wb") as b:
45+
a.write(x)
46+
b.write(y)
47+
48+
# FURB103
49+
with foo() as a, open("file.txt", "w") as b, foo() as c:
50+
# We have other things in here, multiple with items, but the user
51+
# writes a single time to file and that bit they can replace.
52+
bar(a)
53+
b.write(bar(bar(a + x)))
54+
bar(c)
55+
56+
57+
# FURB103
58+
with open("file.txt", "w", newline="\r\n") as f:
59+
f.write(foobar)
60+
61+
62+
# Non-errors.
63+
64+
with open("file.txt", errors="ignore", mode="wb") as f:
65+
# Path.write_bytes() does not support errors
66+
f.write(foobar)
67+
68+
f2 = open("file2.txt", "w")
69+
with open("file.txt", "w") as f:
70+
f2.write(x)
71+
72+
# mode is dynamic
73+
with open("file.txt", foo()) as f:
74+
f.write(x)
75+
76+
# keyword mode is incorrect
77+
with open("file.txt", mode="a+") as f:
78+
f.write(x)
79+
80+
# enables line buffering, not supported in write_text()
81+
with open("file.txt", buffering=1) as f:
82+
f.write(x)
83+
84+
# dont mistake "newline" for "mode"
85+
with open("file.txt", newline="wb") as f:
86+
f.write(x)
87+
88+
# I guess we can possibly also report this case, but the question
89+
# is why the user would put "w+" here in the first place.
90+
with open("file.txt", "w+") as f:
91+
f.write(x)
92+
93+
# Even though we write the whole file, we do other things.
94+
with open("file.txt", "w") as f:
95+
f.write(x)
96+
f.seek(0)
97+
x += f.read(100)
98+
99+
# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
100+
with open(*filename, mode="w") as f:
101+
f.write(x)
102+
103+
# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
104+
with open(**kwargs) as f:
105+
f.write(x)
106+
107+
# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
108+
with open("file.txt", **kwargs) as f:
109+
f.write(x)
110+
111+
# This shouldn't error, since it could contain unsupported arguments, like `buffering`.
112+
with open("file.txt", mode="w", **kwargs) as f:
113+
f.write(x)
114+
115+
# This could error (but doesn't), since it can't contain unsupported arguments, like
116+
# `buffering`.
117+
with open(*filename, mode="w") as f:
118+
f.write(x)
119+
120+
# This could error (but doesn't), since it can't contain unsupported arguments, like
121+
# `buffering`.
122+
with open(*filename, file="file.txt", mode="w") as f:
123+
f.write(x)
124+
125+
# Loops imply multiple writes
126+
with open("file.txt", "w") as f:
127+
while x < 0:
128+
f.write(foobar)
129+
130+
with open("file.txt", "w") as f:
131+
for line in text:
132+
f.write(line)

crates/ruff_linter/src/checkers/ast/analyze/statement.rs

+3
Original file line numberDiff line numberDiff line change
@@ -1225,6 +1225,9 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) {
12251225
if checker.enabled(Rule::ReadWholeFile) {
12261226
refurb::rules::read_whole_file(checker, with_stmt);
12271227
}
1228+
if checker.enabled(Rule::WriteWholeFile) {
1229+
refurb::rules::write_whole_file(checker, with_stmt);
1230+
}
12281231
if checker.enabled(Rule::UselessWithLock) {
12291232
pylint::rules::useless_with_lock(checker, with_stmt);
12301233
}

crates/ruff_linter/src/codes.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
10371037

10381038
// refurb
10391039
(Refurb, "101") => (RuleGroup::Preview, rules::refurb::rules::ReadWholeFile),
1040+
(Refurb, "103") => (RuleGroup::Preview, rules::refurb::rules::WriteWholeFile),
10401041
(Refurb, "105") => (RuleGroup::Preview, rules::refurb::rules::PrintEmptyString),
10411042
(Refurb, "110") => (RuleGroup::Preview, rules::refurb::rules::IfExpInsteadOfOrOperator),
10421043
#[allow(deprecated)]

crates/ruff_linter/src/rules/refurb/helpers.rs

+217-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
use ruff_python_ast as ast;
1+
use ruff_python_ast::{self as ast, Expr};
22
use ruff_python_codegen::Generator;
3-
use ruff_text_size::TextRange;
3+
use ruff_python_semantic::{BindingId, ResolvedReference, SemanticModel};
4+
use ruff_text_size::{Ranged, TextRange};
45

56
/// Format a code snippet to call `name.method()`.
67
pub(super) fn generate_method_call(name: &str, method: &str, generator: Generator) -> String {
@@ -61,3 +62,217 @@ pub(super) fn generate_none_identity_comparison(
6162
};
6263
generator.expr(&compare.into())
6364
}
65+
66+
// Helpers for read-whole-file and write-whole-file
67+
#[derive(Debug, Copy, Clone)]
68+
pub(super) enum OpenMode {
69+
/// "r"
70+
ReadText,
71+
/// "rb"
72+
ReadBytes,
73+
/// "w"
74+
WriteText,
75+
/// "wb"
76+
WriteBytes,
77+
}
78+
79+
impl OpenMode {
80+
pub(super) fn pathlib_method(self) -> String {
81+
match self {
82+
OpenMode::ReadText => "read_text".to_string(),
83+
OpenMode::ReadBytes => "read_bytes".to_string(),
84+
OpenMode::WriteText => "write_text".to_string(),
85+
OpenMode::WriteBytes => "write_bytes".to_string(),
86+
}
87+
}
88+
}
89+
90+
/// A grab bag struct that joins together every piece of information we need to track
91+
/// about a file open operation.
92+
#[derive(Debug)]
93+
pub(super) struct FileOpen<'a> {
94+
/// With item where the open happens, we use it for the reporting range.
95+
pub(super) item: &'a ast::WithItem,
96+
/// Filename expression used as the first argument in `open`, we use it in the diagnostic message.
97+
pub(super) filename: &'a Expr,
98+
/// The file open mode.
99+
pub(super) mode: OpenMode,
100+
/// The file open keywords.
101+
pub(super) keywords: Vec<&'a ast::Keyword>,
102+
/// We only check `open` operations whose file handles are used exactly once.
103+
pub(super) reference: &'a ResolvedReference,
104+
}
105+
106+
impl<'a> FileOpen<'a> {
107+
/// Determine whether an expression is a reference to the file handle, by comparing
108+
/// their ranges. If two expressions have the same range, they must be the same expression.
109+
pub(super) fn is_ref(&self, expr: &Expr) -> bool {
110+
expr.range() == self.reference.range()
111+
}
112+
}
113+
114+
/// Find and return all `open` operations in the given `with` statement.
115+
pub(super) fn find_file_opens<'a>(
116+
with: &'a ast::StmtWith,
117+
semantic: &'a SemanticModel<'a>,
118+
read_mode: bool,
119+
) -> Vec<FileOpen<'a>> {
120+
with.items
121+
.iter()
122+
.filter_map(|item| find_file_open(item, with, semantic, read_mode))
123+
.collect()
124+
}
125+
126+
/// Find `open` operation in the given `with` item.
127+
fn find_file_open<'a>(
128+
item: &'a ast::WithItem,
129+
with: &'a ast::StmtWith,
130+
semantic: &'a SemanticModel<'a>,
131+
read_mode: bool,
132+
) -> Option<FileOpen<'a>> {
133+
// We want to match `open(...) as var`.
134+
let ast::ExprCall {
135+
func,
136+
arguments: ast::Arguments { args, keywords, .. },
137+
..
138+
} = item.context_expr.as_call_expr()?;
139+
140+
if func.as_name_expr()?.id != "open" {
141+
return None;
142+
}
143+
144+
let var = item.optional_vars.as_deref()?.as_name_expr()?;
145+
146+
// Ignore calls with `*args` and `**kwargs`. In the exact case of `open(*filename, mode="w")`,
147+
// it could be a match; but in all other cases, the call _could_ contain unsupported keyword
148+
// arguments, like `buffering`.
149+
if args.iter().any(Expr::is_starred_expr)
150+
|| keywords.iter().any(|keyword| keyword.arg.is_none())
151+
{
152+
return None;
153+
}
154+
155+
// Match positional arguments, get filename and mode.
156+
let (filename, pos_mode) = match_open_args(args)?;
157+
158+
// Match keyword arguments, get keyword arguments to forward and possibly mode.
159+
let (keywords, kw_mode) = match_open_keywords(keywords, read_mode)?;
160+
161+
let mode = kw_mode.unwrap_or(pos_mode);
162+
163+
match mode {
164+
OpenMode::ReadText | OpenMode::ReadBytes => {
165+
if !read_mode {
166+
return None;
167+
}
168+
}
169+
OpenMode::WriteText | OpenMode::WriteBytes => {
170+
if read_mode {
171+
return None;
172+
}
173+
}
174+
}
175+
176+
// Path.read_bytes and Path.write_bytes do not support any kwargs.
177+
if matches!(mode, OpenMode::ReadBytes | OpenMode::WriteBytes) && !keywords.is_empty() {
178+
return None;
179+
}
180+
181+
// Now we need to find what is this variable bound to...
182+
let scope = semantic.current_scope();
183+
let bindings: Vec<BindingId> = scope.get_all(var.id.as_str()).collect();
184+
185+
let binding = bindings
186+
.iter()
187+
.map(|x| semantic.binding(*x))
188+
// We might have many bindings with the same name, but we only care
189+
// for the one we are looking at right now.
190+
.find(|binding| binding.range() == var.range())?;
191+
192+
// Since many references can share the same binding, we can limit our attention span
193+
// exclusively to the body of the current `with` statement.
194+
let references: Vec<&ResolvedReference> = binding
195+
.references
196+
.iter()
197+
.map(|id| semantic.reference(*id))
198+
.filter(|reference| with.range().contains_range(reference.range()))
199+
.collect();
200+
201+
// And even with all these restrictions, if the file handle gets used not exactly once,
202+
// it doesn't fit the bill.
203+
let [reference] = references.as_slice() else {
204+
return None;
205+
};
206+
207+
Some(FileOpen {
208+
item,
209+
filename,
210+
mode,
211+
keywords,
212+
reference,
213+
})
214+
}
215+
216+
/// Match positional arguments. Return expression for the file name and open mode.
217+
fn match_open_args(args: &[Expr]) -> Option<(&Expr, OpenMode)> {
218+
match args {
219+
[filename] => Some((filename, OpenMode::ReadText)),
220+
[filename, mode_literal] => match_open_mode(mode_literal).map(|mode| (filename, mode)),
221+
// The third positional argument is `buffering` and the pathlib methods don't support it.
222+
_ => None,
223+
}
224+
}
225+
226+
/// Match keyword arguments. Return keyword arguments to forward and mode.
227+
fn match_open_keywords(
228+
keywords: &[ast::Keyword],
229+
read_mode: bool,
230+
) -> Option<(Vec<&ast::Keyword>, Option<OpenMode>)> {
231+
let mut result: Vec<&ast::Keyword> = vec![];
232+
let mut mode: Option<OpenMode> = None;
233+
234+
for keyword in keywords {
235+
match keyword.arg.as_ref()?.as_str() {
236+
"encoding" | "errors" => result.push(keyword),
237+
// newline is only valid for write_text
238+
"newline" => {
239+
if read_mode {
240+
return None;
241+
}
242+
result.push(keyword);
243+
}
244+
245+
// This might look bizarre, - why do we re-wrap this optional?
246+
//
247+
// The answer is quite simple, in the result of the current function
248+
// mode being `None` is a possible and correct option meaning that there
249+
// was NO "mode" keyword argument.
250+
//
251+
// The result of `match_open_mode` on the other hand is None
252+
// in the cases when the mode is not compatible with `write_text`/`write_bytes`.
253+
//
254+
// So, here we return None from this whole function if the mode
255+
// is incompatible.
256+
"mode" => mode = Some(match_open_mode(&keyword.value)?),
257+
258+
// All other keywords cannot be directly forwarded.
259+
_ => return None,
260+
};
261+
}
262+
Some((result, mode))
263+
}
264+
265+
/// Match open mode to see if it is supported.
266+
fn match_open_mode(mode: &Expr) -> Option<OpenMode> {
267+
let ast::ExprStringLiteral { value, .. } = mode.as_string_literal_expr()?;
268+
if value.is_implicit_concatenated() {
269+
return None;
270+
}
271+
match value.to_str() {
272+
"r" => Some(OpenMode::ReadText),
273+
"rb" => Some(OpenMode::ReadBytes),
274+
"w" => Some(OpenMode::WriteText),
275+
"wb" => Some(OpenMode::WriteBytes),
276+
_ => None,
277+
}
278+
}

crates/ruff_linter/src/rules/refurb/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ mod tests {
4141
#[test_case(Rule::MetaClassABCMeta, Path::new("FURB180.py"))]
4242
#[test_case(Rule::HashlibDigestHex, Path::new("FURB181.py"))]
4343
#[test_case(Rule::ListReverseCopy, Path::new("FURB187.py"))]
44+
#[test_case(Rule::WriteWholeFile, Path::new("FURB103.py"))]
4445
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
4546
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
4647
let diagnostics = test_path(

crates/ruff_linter/src/rules/refurb/rules/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ pub(crate) use type_none_comparison::*;
2525
pub(crate) use unnecessary_enumerate::*;
2626
pub(crate) use unnecessary_from_float::*;
2727
pub(crate) use verbose_decimal_constructor::*;
28+
pub(crate) use write_whole_file::*;
2829

2930
mod bit_count;
3031
mod check_and_remove_from_set;
@@ -53,3 +54,4 @@ mod type_none_comparison;
5354
mod unnecessary_enumerate;
5455
mod unnecessary_from_float;
5556
mod verbose_decimal_constructor;
57+
mod write_whole_file;

0 commit comments

Comments
 (0)