Skip to content

Commit d9b123f

Browse files
roberthdevriesSeeminSyed
authored andcommitted
BUG: pd.ExcelFile closes stream on destruction (pandas-dev#32544)
* FIX: pandas.ExcelFile should not close stream passed as parameter on destruction Regression test added
1 parent e8c5f85 commit d9b123f

File tree

4 files changed

+25
-12
lines changed

4 files changed

+25
-12
lines changed

doc/source/whatsnew/v1.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Fixed regressions
2929
- Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`)
3030
- Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`)
3131
- Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`)
32+
- Fixed regression in :class:`ExcelFile` where the stream passed into the function was closed by the destructor. (:issue:`31467`)
3233
- Fixed regression in :meth:`DataFrame.reindex_like` on a :class:`DataFrame` subclass raised an ``AssertionError`` (:issue:`31925`)
3334
- Fixed regression in :meth:`Series.shift` with ``datetime64`` dtype when passing an integer ``fill_value`` (:issue:`32591`)
3435

pandas/io/excel/_base.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,9 @@ def _workbook_class(self):
366366
def load_workbook(self, filepath_or_buffer):
367367
pass
368368

369+
def close(self):
370+
pass
371+
369372
@property
370373
@abc.abstractmethod
371374
def sheet_names(self):
@@ -895,14 +898,7 @@ def sheet_names(self):
895898

896899
def close(self):
897900
"""close io if necessary"""
898-
if self.engine == "openpyxl":
899-
# https://stackoverflow.com/questions/31416842/
900-
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
901-
wb = self.book
902-
wb._archive.close()
903-
904-
if hasattr(self.io, "close"):
905-
self.io.close()
901+
self._reader.close()
906902

907903
def __enter__(self):
908904
return self

pandas/io/excel/_openpyxl.py

+5
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,11 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
492492
filepath_or_buffer, read_only=True, data_only=True, keep_links=False
493493
)
494494

495+
def close(self):
496+
# https://stackoverflow.com/questions/31416842/
497+
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
498+
self.book.close()
499+
495500
@property
496501
def sheet_names(self) -> List[str]:
497502
return self.book.sheetnames

pandas/tests/io/excel/test_readers.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,17 @@ def test_read_from_py_localpath(self, read_ext):
629629

630630
tm.assert_frame_equal(expected, actual)
631631

632+
@td.check_file_leaks
633+
def test_close_from_py_localpath(self, read_ext):
634+
635+
# GH31467
636+
str_path = os.path.join("test1" + read_ext)
637+
with open(str_path, "rb") as f:
638+
x = pd.read_excel(f, "Sheet1", index_col=0)
639+
del x
640+
# should not throw an exception because the passed file was closed
641+
f.read()
642+
632643
def test_reader_seconds(self, read_ext):
633644
if pd.read_excel.keywords["engine"] == "pyxlsb":
634645
pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
@@ -1020,10 +1031,10 @@ def test_excel_read_buffer(self, engine, read_ext):
10201031
tm.assert_frame_equal(expected, actual)
10211032

10221033
def test_reader_closes_file(self, engine, read_ext):
1023-
f = open("test1" + read_ext, "rb")
1024-
with pd.ExcelFile(f) as xlsx:
1025-
# parses okay
1026-
pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine)
1034+
with open("test1" + read_ext, "rb") as f:
1035+
with pd.ExcelFile(f) as xlsx:
1036+
# parses okay
1037+
pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine)
10271038

10281039
assert f.closed
10291040

0 commit comments

Comments
 (0)