Skip to content

STYLE: specify encodings when opening files #52999

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
May 7, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
5b482fe
Changes Confirmed
KetuPatel806 Apr 28, 2023
3302c87
Merge branch 'pandas-dev:main' into main
KetuPatel806 Apr 29, 2023
c20ac6d
Encoding Completed
KetuPatel806 Apr 29, 2023
682fdf8
Spaces Are Completed
KetuPatel806 Apr 29, 2023
a5fbd47
Pre-ccommit manually completed
KetuPatel806 Apr 29, 2023
e528292
b.pre-commit removed
KetuPatel806 Apr 29, 2023
3d72dc3
Final Commit
KetuPatel806 Apr 29, 2023
8890943
Some Changed reflected
KetuPatel806 Apr 29, 2023
1647697
Merge branch 'pandas-dev:main' into main
KetuPatel806 Apr 30, 2023
dcbe4dc
test_xml Updated
KetuPatel806 Apr 30, 2023
8d4a14b
Merge branch 'main' of https://github.com/KetuPatel806/pandasContribu…
KetuPatel806 Apr 30, 2023
86b89ea
Merge branch 'main' into main
KetuPatel806 Apr 30, 2023
b4d107b
Pre-commit check passed
KetuPatel806 Apr 30, 2023
56fef4d
Merge branch 'main' of https://github.com/KetuPatel806/pandasContribu…
KetuPatel806 Apr 30, 2023
959dadc
Mode changed in xml file
KetuPatel806 Apr 30, 2023
7aebe05
mode reverted
KetuPatel806 Apr 30, 2023
ba15d83
Merge branch 'pandas-dev:main' into main
KetuPatel806 May 1, 2023
3e9ba30
Merge branch 'pandas-dev:main' into main
KetuPatel806 May 2, 2023
db8a984
Merge branch 'pandas-dev:main' into main
KetuPatel806 May 5, 2023
c5fb9d6
Try to fix errors
KetuPatel806 May 5, 2023
743c63b
Merge branch 'main' of https://github.com/KetuPatel806/pandasContribu…
KetuPatel806 May 5, 2023
d250a1d
error-checks
KetuPatel806 May 5, 2023
ea5c921
Merge branch 'pandas-dev:main' into main
KetuPatel806 May 6, 2023
640e8e0
Fix Some errors
KetuPatel806 May 6, 2023
27ff4bb
Merge branch 'pandas-dev:main' into main
KetuPatel806 May 6, 2023
98c004c
Unspecified-encodingFixed
KetuPatel806 May 6, 2023
c18c670
Merge branch 'main' of https://github.com/KetuPatel806/pandasContribu…
KetuPatel806 May 6, 2023
d3f045d
Merge branch 'main' into main
KetuPatel806 May 6, 2023
513aebd
final commited
KetuPatel806 May 7, 2023
19a1eac
Merge branch 'main' of https://github.com/KetuPatel806/pandasContribu…
KetuPatel806 May 7, 2023
9128769
simplify
May 7, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,6 @@ repos:
hooks:
- id: pylint
stages: [manual]
- repo: https://github.com/pycqa/pylint
rev: v2.16.2
hooks:
- id: pylint
alias: redefined-outer-name
name: Redefining name from outer scope
Expand All @@ -99,6 +96,11 @@ repos:
|^pandas/conftest\.py # keep excluded
args: [--disable=all, --enable=redefined-outer-name]
stages: [manual]
- id: pylint
alias: unspecified-encoding
name: Using open without explicitly specifying an encoding
args: [--disable=all, --enable=unspecified-encoding]
stages: [manual]
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
Expand Down
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ class ReadCSVMemoryGrowth(BaseIO):
param_names = ["engine"]

def setup(self, engine):
with open(self.fname, "w") as f:
with open(self.fname, "w", encoding="utf-8") as f:
for i in range(self.num_rows):
f.write(f"{i}\n")

Expand Down
8 changes: 4 additions & 4 deletions doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,12 @@ def _get_page_title(self, page):
components=(docutils.parsers.rst.Parser,)
)
doc = docutils.utils.new_document("<doc>", option_parser.get_default_values())
with open(fname) as f:
with open(fname, encoding="utf-8") as f:
data = f.read()

parser = docutils.parsers.rst.Parser()
# do not generate any warning when parsing the rst
with open(os.devnull, "a") as f:
with open(os.devnull, "a", encoding="utf-8") as f:
doc.reporter.stream = f
parser.parse(data, doc)

Expand All @@ -186,7 +186,7 @@ def _add_redirects(self):
Create in the build directory an html file with a redirect,
for every row in REDIRECTS_FILE.
"""
with open(REDIRECTS_FILE) as mapping_fd:
with open(REDIRECTS_FILE, encoding="utf-8") as mapping_fd:
reader = csv.reader(mapping_fd)
for row in reader:
if not row or row[0].strip().startswith("#"):
Expand All @@ -209,7 +209,7 @@ def _add_redirects(self):
# sphinx specific stuff
title = "this page"

with open(path, "w") as moved_page_fd:
with open(path, "w", encoding="utf-8") as moved_page_fd:
html = f"""\
<html>
<head>
Expand Down
4 changes: 2 additions & 2 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@
elif single_doc and rel_fname != pattern:
exclude_patterns.append(rel_fname)

with open(os.path.join(source_path, "index.rst.template")) as f:
with open(os.path.join(source_path, "index.rst.template"), encoding="utf-8") as f:
t = jinja2.Template(f.read())
with open(os.path.join(source_path, "index.rst"), "w") as f:
with open(os.path.join(source_path, "index.rst"), "w", encoding="utf-8") as f:
f.write(
t.render(
include_api=include_api,
Expand Down
6 changes: 5 additions & 1 deletion pandas/_testing/contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,11 @@ def ensure_clean(
handle_or_str: str | IO = str(path)
if return_filelike:
kwargs.setdefault("mode", "w+b")
handle_or_str = open(path, **kwargs)
handle_or_str = open(
path,
encoding=kwargs.get("encoding", None),
**{key: value for key, value in kwargs.items() if key != "encoding"},
)

try:
yield handle_or_str
Expand Down
2 changes: 1 addition & 1 deletion pandas/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def git_get_keywords(versionfile_abs):
# _version.py.
keywords = {}
try:
with open(versionfile_abs) as fobj:
with open(versionfile_abs, encoding="utf-8") as fobj:
for line in fobj:
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1699,7 +1699,7 @@ def to_string(
if hasattr(buf, "write"):
buf.write(result)
else:
with open(buf, "w") as f:
with open(buf, "w", encoding="utf-8") as f:
f.write(result)
return None

Expand Down
4 changes: 2 additions & 2 deletions pandas/io/clipboard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,11 +282,11 @@ def copy_dev_clipboard(text):
stacklevel=find_stack_level(),
)

with open("/dev/clipboard", "w") as fd:
with open("/dev/clipboard", "w", encoding="utf-8") as fd:
fd.write(text)

def paste_dev_clipboard() -> str:
with open("/dev/clipboard") as fd:
with open("/dev/clipboard", encoding="utf-8") as fd:
content = fd.read()
return content

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ def test_to_csv_float32_nanrep(self):
with tm.ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path:
df.to_csv(path, na_rep=999)

with open(path) as f:
with open(path, encoding="utf-8") as f:
lines = f.readlines()
assert lines[1].split(",")[2] == "999"

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1702,7 +1702,7 @@ def test_corrupt_files_closed(self, engine, read_ext):
errors = (BadZipFile, xlrd.biffh.XLRDError)

with tm.ensure_clean(f"corrupt{read_ext}") as file:
Path(file).write_text("corrupt")
Path(file).write_text("corrupt", encoding="utf-8")
with tm.assert_produces_warning(False):
try:
pd.ExcelFile(file, engine=engine)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/formats/style/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def tpl_table():
def test_html_template_extends_options():
# make sure if templates are edited tests are updated as are setup fixtures
# to understand the dependency
with open("pandas/io/formats/templates/html.tpl") as file:
with open("pandas/io/formats/templates/html.tpl", encoding="utf-8") as file:
result = file.read()
assert "{% include html_style_tpl %}" in result
assert "{% include html_table_tpl %}" in result
Expand Down
24 changes: 12 additions & 12 deletions pandas/tests/io/formats/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_to_csv_with_single_column(self):
"""
with tm.ensure_clean("test.csv") as path:
df1.to_csv(path, header=None, index=None)
with open(path) as f:
with open(path, encoding="utf-8") as f:
assert f.read() == expected1

df2 = DataFrame([1, None])
Expand All @@ -42,7 +42,7 @@ def test_to_csv_with_single_column(self):
"""
with tm.ensure_clean("test.csv") as path:
df2.to_csv(path, header=None, index=None)
with open(path) as f:
with open(path, encoding="utf-8") as f:
assert f.read() == expected2

def test_to_csv_default_encoding(self):
Expand All @@ -64,7 +64,7 @@ def test_to_csv_quotechar(self):

with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1) # 1=QUOTE_ALL
with open(path) as f:
with open(path, encoding="utf-8") as f:
assert f.read() == expected

expected = """\
Expand All @@ -75,7 +75,7 @@ def test_to_csv_quotechar(self):

with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1, quotechar="$")
with open(path) as f:
with open(path, encoding="utf-8") as f:
assert f.read() == expected

with tm.ensure_clean("test.csv") as path:
Expand All @@ -92,7 +92,7 @@ def test_to_csv_doublequote(self):

with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL
with open(path) as f:
with open(path, encoding="utf-8") as f:
assert f.read() == expected

with tm.ensure_clean("test.csv") as path:
Expand All @@ -109,7 +109,7 @@ def test_to_csv_escapechar(self):

with tm.ensure_clean("test.csv") as path: # QUOTE_ALL
df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
with open(path) as f:
with open(path, encoding="utf-8") as f:
assert f.read() == expected

df = DataFrame({"col": ["a,a", ",bb,"]})
Expand All @@ -121,7 +121,7 @@ def test_to_csv_escapechar(self):

with tm.ensure_clean("test.csv") as path:
df.to_csv(path, quoting=3, escapechar="\\") # QUOTE_NONE
with open(path) as f:
with open(path, encoding="utf-8") as f:
assert f.read() == expected

def test_csv_to_string(self):
Expand Down Expand Up @@ -401,7 +401,7 @@ def test_to_csv_string_array_ascii(self):
"""
with tm.ensure_clean("str_test.csv") as path:
df.to_csv(path, encoding="ascii")
with open(path) as f:
with open(path, encoding="utf-8") as f:
assert f.read() == expected_ascii

def test_to_csv_string_array_utf8(self):
Expand All @@ -415,7 +415,7 @@ def test_to_csv_string_array_utf8(self):
"""
with tm.ensure_clean("unicode_test.csv") as path:
df.to_csv(path, encoding="utf-8")
with open(path) as f:
with open(path, encoding="utf-8") as f:
assert f.read() == expected_utf8

def test_to_csv_string_with_lf(self):
Expand Down Expand Up @@ -521,10 +521,10 @@ def test_to_csv_write_to_open_file(self):
z
"""
with tm.ensure_clean("test.txt") as path:
with open(path, "w") as f:
with open(path, "w", encoding="utf-8") as f:
f.write("manual header\n")
df.to_csv(f, header=None, index=None)
with open(path) as f:
with open(path, encoding="utf-8") as f:
assert f.read() == expected

def test_to_csv_write_to_open_file_with_newline_py3(self):
Expand All @@ -534,7 +534,7 @@ def test_to_csv_write_to_open_file_with_newline_py3(self):
expected_rows = ["x", "y", "z"]
expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
with tm.ensure_clean("test.txt") as path:
with open(path, "w", newline="") as f:
with open(path, "w", newline="", encoding="utf-8") as f:
f.write("manual header\n")
df.to_csv(f, header=None, index=None)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/formats/test_to_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class TestToLatex:
def test_to_latex_to_file(self, float_frame):
with tm.ensure_clean("test.tex") as path:
float_frame.to_latex(path)
with open(path) as f:
with open(path, encoding="utf-8") as f:
assert float_frame.to_latex() == f.read()

def test_to_latex_to_file_utf8_with_encoding(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1214,7 +1214,7 @@ def test_read_s3_jsonl(self, s3_resource, s3so):
def test_read_local_jsonl(self):
# GH17200
with tm.ensure_clean("tmp_items.json") as path:
with open(path, "w") as infile:
with open(path, "w", encoding="utf-8") as infile:
infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
result = read_json(path, lines=True)
expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/parser/common/test_chunksize.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def test_read_csv_memory_growth_chunksize(all_parsers):
parser = all_parsers

with tm.ensure_clean() as path:
with open(path, "w") as f:
with open(path, "w", encoding="utf-8") as f:
for i in range(1000):
f.write(str(i) + "\n")

Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/io/parser/common/test_file_buffer_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def test_no_permission(all_parsers):

# verify that this process cannot open the file (not running as sudo)
try:
with open(path):
with open(path, encoding="utf-8"):
pass
pytest.skip("Running as sudo.")
except PermissionError:
Expand Down Expand Up @@ -285,7 +285,7 @@ def test_file_handles_with_open(all_parsers, csv1):
parser = all_parsers

for mode in ["r", "rb"]:
with open(csv1, mode) as f:
with open(csv1, mode, encoding="utf-8") as f:
parser.read_csv(f)
assert not f.closed

Expand Down Expand Up @@ -392,7 +392,7 @@ def test_context_manageri_user_provided(all_parsers, datapath):
# make sure that user-provided handles are not closed
parser = all_parsers

with open(datapath("io", "data", "csv", "iris.csv")) as path:
with open(datapath("io", "data", "csv", "iris.csv"), encoding="utf-8") as path:
reader = parser.read_csv(path, chunksize=1)
assert not reader.handles.handle.closed
try:
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/common/test_iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,10 @@ def test_iteration_open_handle(all_parsers):
kwargs = {"header": None}

with tm.ensure_clean() as path:
with open(path, "w") as f:
with open(path, "w", encoding="utf-8") as f:
f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG")

with open(path) as f:
with open(path, encoding="utf-8") as f:
for line in f:
if "CCC" in line:
break
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/io/parser/test_c_parser_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ def test_file_handles_mmap(c_parser_only, csv1):
# Don't close user provided file handles.
parser = c_parser_only

with open(csv1) as f:
with open(csv1, encoding="utf-8") as f:
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m:
parser.read_csv(m)
assert not m.closed
Expand All @@ -615,7 +615,7 @@ def test_file_binary_mode(c_parser_only):
expected = DataFrame([[1, 2, 3], [4, 5, 6]])

with tm.ensure_clean() as path:
with open(path, "w") as f:
with open(path, "w", encoding="utf-8") as f:
f.write("1,2,3\n4,5,6")

with open(path, "rb") as f:
Expand All @@ -627,7 +627,7 @@ def test_unix_style_breaks(c_parser_only):
# GH 11020
parser = c_parser_only
with tm.ensure_clean() as path:
with open(path, "w", newline="\n") as f:
with open(path, "w", newline="\n", encoding="utf-8") as f:
f.write("blah\n\ncol_1,col_2,col_3\n\n")
result = parser.read_csv(path, skiprows=2, encoding="utf-8", engine="c")
expected = DataFrame(columns=["col_1", "col_2", "col_3"])
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/io/parser/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def test_infer_compression(all_parsers, csv1, buffer, ext):
kwargs["compression"] = "infer"

if buffer:
with open(csv1) as f:
with open(csv1, encoding="utf-8") as f:
result = parser.read_csv(f, **kwargs)
else:
ext = "." + ext if ext else ""
Expand Down Expand Up @@ -183,7 +183,9 @@ def test_ignore_compression_extension(all_parsers):
with tm.ensure_clean("test.csv.zip") as path_zip:
# make sure to create un-compressed file with zip extension
df.to_csv(path_csv, index=False)
Path(path_zip).write_text(Path(path_csv).read_text())
Path(path_zip).write_text(
Path(path_csv).read_text(encoding="utf-8"), encoding="utf-8"
)

tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,15 +411,15 @@ def test_constructor_bad_file(self, mmap_file):
with pytest.raises(err, match=msg):
icom._maybe_memory_map(non_file, True)

with open(mmap_file) as target:
with open(mmap_file, encoding="utf-8") as target:
pass

msg = "I/O operation on closed file"
with pytest.raises(ValueError, match=msg):
icom._maybe_memory_map(target, True)

def test_next(self, mmap_file):
with open(mmap_file) as target:
with open(mmap_file, encoding="utf-8") as target:
lines = target.readlines()

with icom.get_handle(
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,10 @@ def test_ambiguous_archive_zip():

def test_ambiguous_archive_tar(tmp_path):
csvAPath = tmp_path / "a.csv"
with open(csvAPath, "w") as a:
with open(csvAPath, "w", encoding="utf-8") as a:
a.write("foo,bar\n")
csvBPath = tmp_path / "b.csv"
with open(csvBPath, "w") as b:
with open(csvBPath, "w", encoding="utf-8") as b:
b.write("foo,bar\n")

tarpath = tmp_path / "archive.tar"
Expand Down
Loading