diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 211d0ec95f7ee..6eac25c661689 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -29,6 +29,7 @@ Fixed regressions - Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) - Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`) - Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`) +- Fixed regression in :class:`ExcelFile` where the stream passed into the function was closed by the destructor. (:issue:`31467`) - Fixed regression in :meth:`DataFrame.reindex_like` on a :class:`DataFrame` subclass raised an ``AssertionError`` (:issue:`31925`) - Fixed regression in :meth:`Series.shift` with ``datetime64`` dtype when passing an integer ``fill_value`` (:issue:`32591`) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 2a91381b7fbeb..a8ae359509de2 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -367,6 +367,9 @@ def _workbook_class(self): def load_workbook(self, filepath_or_buffer): pass + def close(self): + pass + @property @abc.abstractmethod def sheet_names(self): @@ -895,14 +898,7 @@ def sheet_names(self): def close(self): """close io if necessary""" - if self.engine == "openpyxl": - # https://stackoverflow.com/questions/31416842/ - # openpyxl-does-not-close-excel-workbook-in-read-only-mode - wb = self.book - wb._archive.close() - - if hasattr(self.io, "close"): - self.io.close() + self._reader.close() def __enter__(self): return self diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index be52523e486af..c4327316d4784 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -497,6 +497,11 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): filepath_or_buffer, read_only=True, data_only=True, keep_links=False ) + def close(self): + # https://stackoverflow.com/questions/31416842/ + # openpyxl-does-not-close-excel-workbook-in-read-only-mode + self.book.close() + @property def sheet_names(self) -> List[str]: return self.book.sheetnames diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 8d00ef1b7fe3e..140cb80dbf7f8 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -628,6 +628,17 @@ def test_read_from_py_localpath(self, read_ext): tm.assert_frame_equal(expected, actual) + @td.check_file_leaks + def test_close_from_py_localpath(self, read_ext): + + # GH31467 + str_path = os.path.join("test1" + read_ext) + with open(str_path, "rb") as f: + x = pd.read_excel(f, "Sheet1", index_col=0) + del x + # should not throw an exception because the passed file was closed + f.read() + def test_reader_seconds(self, read_ext): if pd.read_excel.keywords["engine"] == "pyxlsb": pytest.xfail("Sheets containing datetimes not supported by pyxlsb") @@ -1019,10 +1030,10 @@ def test_excel_read_buffer(self, engine, read_ext): tm.assert_frame_equal(expected, actual) def test_reader_closes_file(self, engine, read_ext): - f = open("test1" + read_ext, "rb") - with pd.ExcelFile(f) as xlsx: - # parses okay - pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine) + with open("test1" + read_ext, "rb") as f: + with pd.ExcelFile(f) as xlsx: + # parses okay + pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine) assert f.closed