Skip to content

BUG/CLN: Minimize number of ResourceWarnings #38168

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,9 @@ class ExcelWriter(metaclass=abc.ABCMeta):
Default is to use xlwt for xls, openpyxl for xlsx, odf for ods.
See DataFrame.to_excel for typical usage.

The writer should be used as a context manager. Otherwise, call `close()` to save
and close any opened file handles.

Parameters
----------
path : str or typing.BinaryIO
Expand Down
4 changes: 3 additions & 1 deletion pandas/io/excel/_xlwt.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ def save(self):
"""
Save workbook to disk.
"""
self.book.save(self.handles.handle)
if self.sheets:
# fails when the ExcelWriter is just opened and then closed
self.book.save(self.handles.handle)

def write_cells(
self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None
Expand Down
6 changes: 5 additions & 1 deletion pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1881,7 +1881,11 @@ def __init__(self, src: FilePathOrBuffer, **kwds):
# no attribute "mmap" [union-attr]
self.handles.handle = self.handles.handle.mmap # type: ignore[union-attr]

self._reader = parsers.TextReader(self.handles.handle, **kwds)
try:
self._reader = parsers.TextReader(self.handles.handle, **kwds)
except Exception:
self.handles.close()
raise
self.unnamed_cols = self._reader.unnamed_cols

passed_names = self.names is None
Expand Down
7 changes: 5 additions & 2 deletions pandas/io/sas/sasreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from pandas._typing import FilePathOrBuffer, Label

from pandas.io.common import stringify_path
from pandas.io.common import IOHandles, stringify_path

if TYPE_CHECKING:
from pandas import DataFrame
Expand All @@ -18,6 +18,8 @@ class ReaderBase(metaclass=ABCMeta):
Protocol for XportReader and SAS7BDATReader classes.
"""

handles: IOHandles

@abstractmethod
def read(self, nrows=None):
pass
Expand Down Expand Up @@ -134,4 +136,5 @@ def read_sas(
if iterator or chunksize:
return reader

return reader.read()
with reader.handles:
return reader.read()
13 changes: 6 additions & 7 deletions pandas/tests/io/excel/test_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,11 @@ def test_write_cells_merge_styled(ext):
]

with tm.ensure_clean(ext) as path:
writer = _OpenpyxlWriter(path)
writer.write_cells(initial_cells, sheet_name=sheet_name)
writer.write_cells(merge_cells, sheet_name=sheet_name)
with _OpenpyxlWriter(path) as writer:
writer.write_cells(initial_cells, sheet_name=sheet_name)
writer.write_cells(merge_cells, sheet_name=sheet_name)

wks = writer.sheets[sheet_name]
wks = writer.sheets[sheet_name]
xcell_b1 = wks["B1"]
xcell_a2 = wks["A2"]
assert xcell_b1.font == openpyxl_sty_merged
Expand All @@ -93,9 +93,8 @@ def test_write_append_mode(ext, mode, expected):
wb.worksheets[1]["A1"].value = "bar"
wb.save(f)

writer = ExcelWriter(f, engine="openpyxl", mode=mode)
df.to_excel(writer, sheet_name="baz", index=False)
writer.save()
with ExcelWriter(f, engine="openpyxl", mode=mode) as writer:
df.to_excel(writer, sheet_name="baz", index=False)

wb2 = openpyxl.load_workbook(f)
result = [sheet.title for sheet in wb2.worksheets]
Expand Down
17 changes: 8 additions & 9 deletions pandas/tests/io/excel/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,14 @@ def custom_converter(css):

df = DataFrame(np.random.randn(11, 3))
with tm.ensure_clean(".xlsx" if engine != "xlwt" else ".xls") as path:
writer = ExcelWriter(path, engine=engine)
df.to_excel(writer, sheet_name="frame")
df.style.to_excel(writer, sheet_name="unstyled")
styled = df.style.apply(style, axis=None)
styled.to_excel(writer, sheet_name="styled")
ExcelFormatter(styled, style_converter=custom_converter).write(
writer, sheet_name="custom"
)
writer.save()
with ExcelWriter(path, engine=engine) as writer:
df.to_excel(writer, sheet_name="frame")
df.style.to_excel(writer, sheet_name="unstyled")
styled = df.style.apply(style, axis=None)
styled.to_excel(writer, sheet_name="styled")
ExcelFormatter(styled, style_converter=custom_converter).write(
writer, sheet_name="custom"
)

if engine not in ("openpyxl", "xlsxwriter"):
# For other engines, we only smoke test
Expand Down
44 changes: 21 additions & 23 deletions pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,10 +522,9 @@ def test_sheets(self, frame, tsframe, path):
frame.to_excel(path, "test1", index=False)

# Test writing to separate sheets
writer = ExcelWriter(path)
frame.to_excel(writer, "test1")
tsframe.to_excel(writer, "test2")
writer.close()
with ExcelWriter(path) as writer:
frame.to_excel(writer, "test1")
tsframe.to_excel(writer, "test2")
reader = ExcelFile(path)
recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
tm.assert_frame_equal(frame, recons)
Expand Down Expand Up @@ -1199,17 +1198,16 @@ def test_datetimes(self, path):

def test_bytes_io(self, engine):
# see gh-7074
bio = BytesIO()
df = DataFrame(np.random.randn(10, 2))
with BytesIO() as bio:
df = DataFrame(np.random.randn(10, 2))

# Pass engine explicitly, as there is no file path to infer from.
writer = ExcelWriter(bio, engine=engine)
df.to_excel(writer)
writer.save()
# Pass engine explicitly, as there is no file path to infer from.
with ExcelWriter(bio, engine=engine) as writer:
df.to_excel(writer)

bio.seek(0)
reread_df = pd.read_excel(bio, index_col=0)
tm.assert_frame_equal(df, reread_df)
bio.seek(0)
reread_df = pd.read_excel(bio, index_col=0)
tm.assert_frame_equal(df, reread_df)

def test_write_lists_dict(self, path):
# see gh-8188.
Expand Down Expand Up @@ -1317,12 +1315,12 @@ class TestExcelWriterEngineTests:
)
def test_ExcelWriter_dispatch(self, klass, ext):
with tm.ensure_clean(ext) as path:
writer = ExcelWriter(path)
if ext == ".xlsx" and td.safe_import("xlsxwriter"):
# xlsxwriter has preference over openpyxl if both installed
assert isinstance(writer, _XlsxWriter)
else:
assert isinstance(writer, klass)
with ExcelWriter(path) as writer:
if ext == ".xlsx" and td.safe_import("xlsxwriter"):
# xlsxwriter has preference over openpyxl if both installed
assert isinstance(writer, _XlsxWriter)
else:
assert isinstance(writer, klass)

def test_ExcelWriter_dispatch_raises(self):
with pytest.raises(ValueError, match="No engine"):
Expand Down Expand Up @@ -1356,8 +1354,8 @@ def check_called(func):
path = "something.xlsx"
with tm.ensure_clean(path) as filepath:
register_writer(DummyClass)
writer = ExcelWriter(filepath)
assert isinstance(writer, DummyClass)
with ExcelWriter(filepath) as writer:
assert isinstance(writer, DummyClass)
df = tm.makeCustomDataframe(1, 1)
check_called(lambda: df.to_excel(filepath))
with tm.ensure_clean("something.xls") as filepath:
Expand All @@ -1377,5 +1375,5 @@ def test_excelfile_fspath(self):

def test_excelwriter_fspath(self):
with tm.ensure_clean("foo.xlsx") as path:
writer = ExcelWriter(path)
assert os.fspath(writer) == str(path)
with ExcelWriter(path) as writer:
assert os.fspath(writer) == str(path)
19 changes: 9 additions & 10 deletions pandas/tests/io/excel/test_xlsxwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,15 @@ def test_column_format(ext):
with tm.ensure_clean(ext) as path:
frame = DataFrame({"A": [123456, 123456], "B": [123456, 123456]})

writer = ExcelWriter(path)
frame.to_excel(writer)

# Add a number format to col B and ensure it is applied to cells.
num_format = "#,##0"
write_workbook = writer.book
write_worksheet = write_workbook.worksheets()[0]
col_format = write_workbook.add_format({"num_format": num_format})
write_worksheet.set_column("B:B", None, col_format)
writer.save()
with ExcelWriter(path) as writer:
frame.to_excel(writer)

# Add a number format to col B and ensure it is applied to cells.
num_format = "#,##0"
write_workbook = writer.book
write_worksheet = write_workbook.worksheets()[0]
col_format = write_workbook.add_format({"num_format": num_format})
write_worksheet.set_column("B:B", None, col_format)

read_workbook = openpyxl.load_workbook(path)
try:
Expand Down
19 changes: 11 additions & 8 deletions pandas/tests/io/parser/test_multi_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Tests multithreading behaviour for reading and
parsing files for each parser defined in parsers.py
"""
from contextlib import ExitStack
from io import BytesIO
from multiprocessing.pool import ThreadPool

Expand Down Expand Up @@ -46,16 +47,18 @@ def test_multi_thread_string_io_read_csv(all_parsers):
"\n".join([f"{i:d},{i:d},{i:d}" for i in range(max_row_range)]).encode()
for _ in range(num_files)
]
files = [BytesIO(b) for b in bytes_to_df]

# Read all files in many threads.
pool = ThreadPool(8)
with ExitStack() as stack:
files = [stack.enter_context(BytesIO(b)) for b in bytes_to_df]

results = pool.map(parser.read_csv, files)
first_result = results[0]
pool = stack.enter_context(ThreadPool(8))

for result in results:
tm.assert_frame_equal(first_result, result)
results = pool.map(parser.read_csv, files)
first_result = results[0]

for result in results:
tm.assert_frame_equal(first_result, result)


def _generate_multi_thread_dataframe(parser, path, num_rows, num_tasks):
Expand Down Expand Up @@ -116,8 +119,8 @@ def reader(arg):
(num_rows * i // num_tasks, num_rows // num_tasks) for i in range(num_tasks)
]

pool = ThreadPool(processes=num_tasks)
results = pool.map(reader, tasks)
with ThreadPool(processes=num_tasks) as pool:
results = pool.map(reader, tasks)

header = results[0].columns

Expand Down