Skip to content

Commit 8b40ae9

Browse files
Backport PR #32544: BUG: pd.ExcelFile closes stream on destruction (#32657)
Co-authored-by: Robert de Vries <[email protected]>
1 parent b38357b commit 8b40ae9

File tree

4 files changed

+25
-12
lines changed

4 files changed

+25
-12
lines changed

doc/source/whatsnew/v1.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Fixed regressions
2929
- Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`)
3030
- Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`)
3131
- Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`)
32+
- Fixed regression in :class:`ExcelFile` where the stream passed into the function was closed by the destructor. (:issue:`31467`)
3233
- Fixed regression in :meth:`DataFrame.reindex_like` on a :class:`DataFrame` subclass raised an ``AssertionError`` (:issue:`31925`)
3334
- Fixed regression in :meth:`Series.shift` with ``datetime64`` dtype when passing an integer ``fill_value`` (:issue:`32591`)
3435

pandas/io/excel/_base.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,9 @@ def _workbook_class(self):
367367
def load_workbook(self, filepath_or_buffer):
368368
pass
369369

370+
def close(self):
371+
pass
372+
370373
@property
371374
@abc.abstractmethod
372375
def sheet_names(self):
@@ -895,14 +898,7 @@ def sheet_names(self):
895898

896899
def close(self):
897900
"""close io if necessary"""
898-
if self.engine == "openpyxl":
899-
# https://stackoverflow.com/questions/31416842/
900-
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
901-
wb = self.book
902-
wb._archive.close()
903-
904-
if hasattr(self.io, "close"):
905-
self.io.close()
901+
self._reader.close()
906902

907903
def __enter__(self):
908904
return self

pandas/io/excel/_openpyxl.py

+5
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,11 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
497497
filepath_or_buffer, read_only=True, data_only=True, keep_links=False
498498
)
499499

500+
def close(self):
501+
# https://stackoverflow.com/questions/31416842/
502+
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
503+
self.book.close()
504+
500505
@property
501506
def sheet_names(self) -> List[str]:
502507
return self.book.sheetnames

pandas/tests/io/excel/test_readers.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,17 @@ def test_read_from_py_localpath(self, read_ext):
628628

629629
tm.assert_frame_equal(expected, actual)
630630

631+
@td.check_file_leaks
632+
def test_close_from_py_localpath(self, read_ext):
633+
634+
# GH31467
635+
str_path = os.path.join("test1" + read_ext)
636+
with open(str_path, "rb") as f:
637+
x = pd.read_excel(f, "Sheet1", index_col=0)
638+
del x
639+
# should not throw an exception because the passed file was closed
640+
f.read()
641+
631642
def test_reader_seconds(self, read_ext):
632643
if pd.read_excel.keywords["engine"] == "pyxlsb":
633644
pytest.xfail("Sheets containing datetimes not supported by pyxlsb")
@@ -1019,10 +1030,10 @@ def test_excel_read_buffer(self, engine, read_ext):
10191030
tm.assert_frame_equal(expected, actual)
10201031

10211032
def test_reader_closes_file(self, engine, read_ext):
1022-
f = open("test1" + read_ext, "rb")
1023-
with pd.ExcelFile(f) as xlsx:
1024-
# parses okay
1025-
pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine)
1033+
with open("test1" + read_ext, "rb") as f:
1034+
with pd.ExcelFile(f) as xlsx:
1035+
# parses okay
1036+
pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine)
10261037

10271038
assert f.closed
10281039

0 commit comments

Comments
 (0)