Skip to content

Commit de68caa

Browse files
KetuPatel806im-vinicius
authored and
im-vinicius
committed
STYLE: specify encodings when opening files (pandas-dev#52999)
* Changes Confirmed * Encoding Completed * Spaces Are Completed * Pre-ccommit manually completed * b.pre-commit removed * Final Commit * Some Changed reflected * test_xml Updated * Pre-commit check passed * Mode changed in xml file * mode reverted * Try to fix errors * error-checks * Fix Some errors * Unspecified-encodingFixed * final commited * simplify --------- Co-authored-by: MarcoGorelli <>
1 parent 3eae27d commit de68caa

39 files changed

+113
-92
lines changed

.pre-commit-config.yaml

+5-3
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,6 @@ repos:
8383
hooks:
8484
- id: pylint
8585
stages: [manual]
86-
- repo: https://github.com/pycqa/pylint
87-
rev: v2.16.2
88-
hooks:
8986
- id: pylint
9087
alias: redefined-outer-name
9188
name: Redefining name from outer scope
@@ -99,6 +96,11 @@ repos:
9996
|^pandas/conftest\.py # keep excluded
10097
args: [--disable=all, --enable=redefined-outer-name]
10198
stages: [manual]
99+
- id: pylint
100+
alias: unspecified-encoding
101+
name: Using open without explicitly specifying an encoding
102+
args: [--disable=all, --enable=unspecified-encoding]
103+
stages: [manual]
102104
- repo: https://github.com/PyCQA/isort
103105
rev: 5.12.0
104106
hooks:

asv_bench/benchmarks/io/csv.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ class ReadCSVMemoryGrowth(BaseIO):
497497
param_names = ["engine"]
498498

499499
def setup(self, engine):
500-
with open(self.fname, "w") as f:
500+
with open(self.fname, "w", encoding="utf-8") as f:
501501
for i in range(self.num_rows):
502502
f.write(f"{i}\n")
503503

doc/make.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -163,12 +163,12 @@ def _get_page_title(self, page):
163163
components=(docutils.parsers.rst.Parser,)
164164
)
165165
doc = docutils.utils.new_document("<doc>", option_parser.get_default_values())
166-
with open(fname) as f:
166+
with open(fname, encoding="utf-8") as f:
167167
data = f.read()
168168

169169
parser = docutils.parsers.rst.Parser()
170170
# do not generate any warning when parsing the rst
171-
with open(os.devnull, "a") as f:
171+
with open(os.devnull, "a", encoding="utf-8") as f:
172172
doc.reporter.stream = f
173173
parser.parse(data, doc)
174174

@@ -186,7 +186,7 @@ def _add_redirects(self):
186186
Create in the build directory an html file with a redirect,
187187
for every row in REDIRECTS_FILE.
188188
"""
189-
with open(REDIRECTS_FILE) as mapping_fd:
189+
with open(REDIRECTS_FILE, encoding="utf-8") as mapping_fd:
190190
reader = csv.reader(mapping_fd)
191191
for row in reader:
192192
if not row or row[0].strip().startswith("#"):
@@ -209,7 +209,7 @@ def _add_redirects(self):
209209
# sphinx specific stuff
210210
title = "this page"
211211

212-
with open(path, "w") as moved_page_fd:
212+
with open(path, "w", encoding="utf-8") as moved_page_fd:
213213
html = f"""\
214214
<html>
215215
<head>

doc/source/conf.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,9 @@
117117
elif single_doc and rel_fname != pattern:
118118
exclude_patterns.append(rel_fname)
119119

120-
with open(os.path.join(source_path, "index.rst.template")) as f:
120+
with open(os.path.join(source_path, "index.rst.template"), encoding="utf-8") as f:
121121
t = jinja2.Template(f.read())
122-
with open(os.path.join(source_path, "index.rst"), "w") as f:
122+
with open(os.path.join(source_path, "index.rst"), "w", encoding="utf-8") as f:
123123
f.write(
124124
t.render(
125125
include_api=include_api,

pandas/_testing/contexts.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,10 @@ def ensure_clean(
124124
path.touch()
125125

126126
handle_or_str: str | IO = str(path)
127+
encoding = kwargs.pop("encoding", None)
127128
if return_filelike:
128129
kwargs.setdefault("mode", "w+b")
129-
handle_or_str = open(path, **kwargs)
130+
handle_or_str = open(path, encoding=encoding, **kwargs)
130131

131132
try:
132133
yield handle_or_str

pandas/_version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def git_get_keywords(versionfile_abs):
159159
# _version.py.
160160
keywords = {}
161161
try:
162-
with open(versionfile_abs) as fobj:
162+
with open(versionfile_abs, encoding="utf-8") as fobj:
163163
for line in fobj:
164164
if line.strip().startswith("git_refnames ="):
165165
mo = re.search(r'=\s*"(.*)"', line)

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1699,7 +1699,7 @@ def to_string(
16991699
if hasattr(buf, "write"):
17001700
buf.write(result)
17011701
else:
1702-
with open(buf, "w") as f:
1702+
with open(buf, "w", encoding="utf-8") as f:
17031703
f.write(result)
17041704
return None
17051705

pandas/io/clipboard/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -282,11 +282,11 @@ def copy_dev_clipboard(text):
282282
stacklevel=find_stack_level(),
283283
)
284284

285-
with open("/dev/clipboard", "w") as fd:
285+
with open("/dev/clipboard", "w", encoding="utf-8") as fd:
286286
fd.write(text)
287287

288288
def paste_dev_clipboard() -> str:
289-
with open("/dev/clipboard") as fd:
289+
with open("/dev/clipboard", encoding="utf-8") as fd:
290290
content = fd.read()
291291
return content
292292

pandas/tests/frame/methods/test_to_csv.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ def test_to_csv_float32_nanrep(self):
626626
with tm.ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path:
627627
df.to_csv(path, na_rep=999)
628628

629-
with open(path) as f:
629+
with open(path, encoding="utf-8") as f:
630630
lines = f.readlines()
631631
assert lines[1].split(",")[2] == "999"
632632

pandas/tests/io/excel/test_readers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1708,7 +1708,7 @@ def test_corrupt_files_closed(self, engine, read_ext):
17081708
errors = (BadZipFile, xlrd.biffh.XLRDError)
17091709

17101710
with tm.ensure_clean(f"corrupt{read_ext}") as file:
1711-
Path(file).write_text("corrupt")
1711+
Path(file).write_text("corrupt", encoding="utf-8")
17121712
with tm.assert_produces_warning(False):
17131713
try:
17141714
pd.ExcelFile(file, engine=engine)

pandas/tests/io/formats/style/test_html.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def tpl_table():
4343
def test_html_template_extends_options():
4444
# make sure if templates are edited tests are updated as are setup fixtures
4545
# to understand the dependency
46-
with open("pandas/io/formats/templates/html.tpl") as file:
46+
with open("pandas/io/formats/templates/html.tpl", encoding="utf-8") as file:
4747
result = file.read()
4848
assert "{% include html_style_tpl %}" in result
4949
assert "{% include html_table_tpl %}" in result

pandas/tests/io/formats/test_to_csv.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def test_to_csv_with_single_column(self):
3232
"""
3333
with tm.ensure_clean("test.csv") as path:
3434
df1.to_csv(path, header=None, index=None)
35-
with open(path) as f:
35+
with open(path, encoding="utf-8") as f:
3636
assert f.read() == expected1
3737

3838
df2 = DataFrame([1, None])
@@ -42,7 +42,7 @@ def test_to_csv_with_single_column(self):
4242
"""
4343
with tm.ensure_clean("test.csv") as path:
4444
df2.to_csv(path, header=None, index=None)
45-
with open(path) as f:
45+
with open(path, encoding="utf-8") as f:
4646
assert f.read() == expected2
4747

4848
def test_to_csv_default_encoding(self):
@@ -64,7 +64,7 @@ def test_to_csv_quotechar(self):
6464

6565
with tm.ensure_clean("test.csv") as path:
6666
df.to_csv(path, quoting=1) # 1=QUOTE_ALL
67-
with open(path) as f:
67+
with open(path, encoding="utf-8") as f:
6868
assert f.read() == expected
6969

7070
expected = """\
@@ -75,7 +75,7 @@ def test_to_csv_quotechar(self):
7575

7676
with tm.ensure_clean("test.csv") as path:
7777
df.to_csv(path, quoting=1, quotechar="$")
78-
with open(path) as f:
78+
with open(path, encoding="utf-8") as f:
7979
assert f.read() == expected
8080

8181
with tm.ensure_clean("test.csv") as path:
@@ -92,7 +92,7 @@ def test_to_csv_doublequote(self):
9292

9393
with tm.ensure_clean("test.csv") as path:
9494
df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL
95-
with open(path) as f:
95+
with open(path, encoding="utf-8") as f:
9696
assert f.read() == expected
9797

9898
with tm.ensure_clean("test.csv") as path:
@@ -109,7 +109,7 @@ def test_to_csv_escapechar(self):
109109

110110
with tm.ensure_clean("test.csv") as path: # QUOTE_ALL
111111
df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
112-
with open(path) as f:
112+
with open(path, encoding="utf-8") as f:
113113
assert f.read() == expected
114114

115115
df = DataFrame({"col": ["a,a", ",bb,"]})
@@ -121,7 +121,7 @@ def test_to_csv_escapechar(self):
121121

122122
with tm.ensure_clean("test.csv") as path:
123123
df.to_csv(path, quoting=3, escapechar="\\") # QUOTE_NONE
124-
with open(path) as f:
124+
with open(path, encoding="utf-8") as f:
125125
assert f.read() == expected
126126

127127
def test_csv_to_string(self):
@@ -401,7 +401,7 @@ def test_to_csv_string_array_ascii(self):
401401
"""
402402
with tm.ensure_clean("str_test.csv") as path:
403403
df.to_csv(path, encoding="ascii")
404-
with open(path) as f:
404+
with open(path, encoding="utf-8") as f:
405405
assert f.read() == expected_ascii
406406

407407
def test_to_csv_string_array_utf8(self):
@@ -415,7 +415,7 @@ def test_to_csv_string_array_utf8(self):
415415
"""
416416
with tm.ensure_clean("unicode_test.csv") as path:
417417
df.to_csv(path, encoding="utf-8")
418-
with open(path) as f:
418+
with open(path, encoding="utf-8") as f:
419419
assert f.read() == expected_utf8
420420

421421
def test_to_csv_string_with_lf(self):
@@ -521,10 +521,10 @@ def test_to_csv_write_to_open_file(self):
521521
z
522522
"""
523523
with tm.ensure_clean("test.txt") as path:
524-
with open(path, "w") as f:
524+
with open(path, "w", encoding="utf-8") as f:
525525
f.write("manual header\n")
526526
df.to_csv(f, header=None, index=None)
527-
with open(path) as f:
527+
with open(path, encoding="utf-8") as f:
528528
assert f.read() == expected
529529

530530
def test_to_csv_write_to_open_file_with_newline_py3(self):
@@ -534,7 +534,7 @@ def test_to_csv_write_to_open_file_with_newline_py3(self):
534534
expected_rows = ["x", "y", "z"]
535535
expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
536536
with tm.ensure_clean("test.txt") as path:
537-
with open(path, "w", newline="") as f:
537+
with open(path, "w", newline="", encoding="utf-8") as f:
538538
f.write("manual header\n")
539539
df.to_csv(f, header=None, index=None)
540540

pandas/tests/io/formats/test_to_latex.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class TestToLatex:
3434
def test_to_latex_to_file(self, float_frame):
3535
with tm.ensure_clean("test.tex") as path:
3636
float_frame.to_latex(path)
37-
with open(path) as f:
37+
with open(path, encoding="utf-8") as f:
3838
assert float_frame.to_latex() == f.read()
3939

4040
def test_to_latex_to_file_utf8_with_encoding(self):

pandas/tests/io/json/test_pandas.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1214,7 +1214,7 @@ def test_read_s3_jsonl(self, s3_resource, s3so):
12141214
def test_read_local_jsonl(self):
12151215
# GH17200
12161216
with tm.ensure_clean("tmp_items.json") as path:
1217-
with open(path, "w") as infile:
1217+
with open(path, "w", encoding="utf-8") as infile:
12181218
infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
12191219
result = read_json(path, lines=True)
12201220
expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])

pandas/tests/io/parser/common/test_chunksize.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ def test_read_csv_memory_growth_chunksize(all_parsers):
228228
parser = all_parsers
229229

230230
with tm.ensure_clean() as path:
231-
with open(path, "w") as f:
231+
with open(path, "w", encoding="utf-8") as f:
232232
for i in range(1000):
233233
f.write(str(i) + "\n")
234234

pandas/tests/io/parser/common/test_file_buffer_url.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def test_no_permission(all_parsers):
107107

108108
# verify that this process cannot open the file (not running as sudo)
109109
try:
110-
with open(path):
110+
with open(path, encoding="utf-8"):
111111
pass
112112
pytest.skip("Running as sudo.")
113113
except PermissionError:
@@ -285,7 +285,7 @@ def test_file_handles_with_open(all_parsers, csv1):
285285
parser = all_parsers
286286

287287
for mode in ["r", "rb"]:
288-
with open(csv1, mode) as f:
288+
with open(csv1, mode, encoding="utf-8" if mode == "r" else None) as f:
289289
parser.read_csv(f)
290290
assert not f.closed
291291

@@ -392,7 +392,7 @@ def test_context_manageri_user_provided(all_parsers, datapath):
392392
# make sure that user-provided handles are not closed
393393
parser = all_parsers
394394

395-
with open(datapath("io", "data", "csv", "iris.csv")) as path:
395+
with open(datapath("io", "data", "csv", "iris.csv"), encoding="utf-8") as path:
396396
reader = parser.read_csv(path, chunksize=1)
397397
assert not reader.handles.handle.closed
398398
try:

pandas/tests/io/parser/common/test_iterator.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,10 @@ def test_iteration_open_handle(all_parsers):
9595
kwargs = {"header": None}
9696

9797
with tm.ensure_clean() as path:
98-
with open(path, "w") as f:
98+
with open(path, "w", encoding="utf-8") as f:
9999
f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG")
100100

101-
with open(path) as f:
101+
with open(path, encoding="utf-8") as f:
102102
for line in f:
103103
if "CCC" in line:
104104
break

pandas/tests/io/parser/test_c_parser_only.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,7 @@ def test_file_handles_mmap(c_parser_only, csv1):
603603
# Don't close user provided file handles.
604604
parser = c_parser_only
605605

606-
with open(csv1) as f:
606+
with open(csv1, encoding="utf-8") as f:
607607
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m:
608608
parser.read_csv(m)
609609
assert not m.closed
@@ -615,7 +615,7 @@ def test_file_binary_mode(c_parser_only):
615615
expected = DataFrame([[1, 2, 3], [4, 5, 6]])
616616

617617
with tm.ensure_clean() as path:
618-
with open(path, "w") as f:
618+
with open(path, "w", encoding="utf-8") as f:
619619
f.write("1,2,3\n4,5,6")
620620

621621
with open(path, "rb") as f:
@@ -627,7 +627,7 @@ def test_unix_style_breaks(c_parser_only):
627627
# GH 11020
628628
parser = c_parser_only
629629
with tm.ensure_clean() as path:
630-
with open(path, "w", newline="\n") as f:
630+
with open(path, "w", newline="\n", encoding="utf-8") as f:
631631
f.write("blah\n\ncol_1,col_2,col_3\n\n")
632632
result = parser.read_csv(path, skiprows=2, encoding="utf-8", engine="c")
633633
expected = DataFrame(columns=["col_1", "col_2", "col_3"])

pandas/tests/io/parser/test_compression.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def test_infer_compression(all_parsers, csv1, buffer, ext):
129129
kwargs["compression"] = "infer"
130130

131131
if buffer:
132-
with open(csv1) as f:
132+
with open(csv1, encoding="utf-8") as f:
133133
result = parser.read_csv(f, **kwargs)
134134
else:
135135
ext = "." + ext if ext else ""
@@ -183,7 +183,9 @@ def test_ignore_compression_extension(all_parsers):
183183
with tm.ensure_clean("test.csv.zip") as path_zip:
184184
# make sure to create un-compressed file with zip extension
185185
df.to_csv(path_csv, index=False)
186-
Path(path_zip).write_text(Path(path_csv).read_text())
186+
Path(path_zip).write_text(
187+
Path(path_csv).read_text(encoding="utf-8"), encoding="utf-8"
188+
)
187189

188190
tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)
189191

pandas/tests/io/test_common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -411,15 +411,15 @@ def test_constructor_bad_file(self, mmap_file):
411411
with pytest.raises(err, match=msg):
412412
icom._maybe_memory_map(non_file, True)
413413

414-
with open(mmap_file) as target:
414+
with open(mmap_file, encoding="utf-8") as target:
415415
pass
416416

417417
msg = "I/O operation on closed file"
418418
with pytest.raises(ValueError, match=msg):
419419
icom._maybe_memory_map(target, True)
420420

421421
def test_next(self, mmap_file):
422-
with open(mmap_file) as target:
422+
with open(mmap_file, encoding="utf-8") as target:
423423
lines = target.readlines()
424424

425425
with icom.get_handle(

pandas/tests/io/test_compression.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -299,10 +299,10 @@ def test_ambiguous_archive_zip():
299299

300300
def test_ambiguous_archive_tar(tmp_path):
301301
csvAPath = tmp_path / "a.csv"
302-
with open(csvAPath, "w") as a:
302+
with open(csvAPath, "w", encoding="utf-8") as a:
303303
a.write("foo,bar\n")
304304
csvBPath = tmp_path / "b.csv"
305-
with open(csvBPath, "w") as b:
305+
with open(csvBPath, "w", encoding="utf-8") as b:
306306
b.write("foo,bar\n")
307307

308308
tarpath = tmp_path / "archive.tar"

0 commit comments

Comments
 (0)