From 9b018f68af06545d30b33caf91e5a2c29e901189 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 8 Mar 2020 20:02:09 +0100 Subject: [PATCH 1/8] FIX: pandas.ExcelFile should not close stream passed as parameter on destruction Regression test added Bonus: refactor closing of openpyxl file into openpyxl class instead of anti-pattern check in ExcelFile.close() for openpyxl engine --- pandas/io/excel/_base.py | 13 +++++-------- pandas/io/excel/_odfreader.py | 3 +++ pandas/io/excel/_openpyxl.py | 5 +++++ pandas/io/excel/_pyxlsb.py | 3 +++ pandas/io/excel/_xlrd.py | 3 +++ pandas/tests/io/excel/test_readers.py | 22 ++++++++++++++++++---- 6 files changed, 37 insertions(+), 12 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index f98d9501f1f73..c224f2e46879a 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -366,6 +366,10 @@ def _workbook_class(self): def load_workbook(self, filepath_or_buffer): pass + @abc.abstractmethod + def close(self): + pass + @property @abc.abstractmethod def sheet_names(self): @@ -895,14 +899,7 @@ def sheet_names(self): def close(self): """close io if necessary""" - if self.engine == "openpyxl": - # https://stackoverflow.com/questions/31416842/ - # openpyxl-does-not-close-excel-workbook-in-read-only-mode - wb = self.book - wb._archive.close() - - if hasattr(self.io, "close"): - self.io.close() + self._reader.close() def __enter__(self): return self diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 7af776dc1a10f..b1fb312cd6020 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -33,6 +33,9 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): return load(filepath_or_buffer) + def close(self): + pass + @property def empty_value(self) -> str: """Property for compat with other readers.""" diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index a96c0f814e2d8..0696d82e51f34 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -492,6 +492,11 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): filepath_or_buffer, read_only=True, data_only=True, keep_links=False ) + def close(self): + # https://stackoverflow.com/questions/31416842/ + # openpyxl-does-not-close-excel-workbook-in-read-only-mode + self.book.close() + @property def sheet_names(self) -> List[str]: return self.book.sheetnames diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index 0d96c8c4acdb8..13e41f364ac6d 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -36,6 +36,9 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): return open_workbook(filepath_or_buffer) + def close(self): + pass + @property def sheet_names(self) -> List[str]: return self.book.sheets diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 8f7d3b1368fc7..ff9d9d3a448c6 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -36,6 +36,9 @@ def load_workbook(self, filepath_or_buffer): else: return open_workbook(filepath_or_buffer) + def close(self): + pass + @property def sheet_names(self): return self.book.sheet_names() diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index a59b409809eed..f1b621d228206 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -629,6 +629,20 @@ def test_read_from_py_localpath(self, read_ext): tm.assert_frame_equal(expected, actual) + @td.skip_if_no("py.path") + @td.check_file_leaks + def test_close_from_py_localpath(self, read_ext): + + # GH31467 + from py.path import local as LocalPath + + str_path = os.path.join("test1" + read_ext) + with open(str_path, 'rb') as f: + x = pd.read_excel(f, "Sheet1", index_col=0) + del x + # should not throw an exception because the passed file was closed + f.read() + def test_reader_seconds(self, read_ext): if pd.read_excel.keywords["engine"] == "pyxlsb": pytest.xfail("Sheets containing datetimes not supported by pyxlsb") @@ -1020,10 +1034,10 @@ def test_excel_read_buffer(self, engine, read_ext): tm.assert_frame_equal(expected, actual) def test_reader_closes_file(self, engine, read_ext): - f = open("test1" + read_ext, "rb") - with pd.ExcelFile(f) as xlsx: - # parses okay - pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine) + with open("test1" + read_ext, "rb") as f: + with pd.ExcelFile(f) as xlsx: + # parses okay + pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine) assert f.closed From 3c8104150bdb8df5d811d725c3fa6d2c2e3b80d7 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 8 Mar 2020 20:06:58 +0100 Subject: [PATCH 2/8] Fix from black reformatter --- pandas/tests/io/excel/test_readers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index f1b621d228206..999cc2f990a9b 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -637,7 +637,7 @@ def test_close_from_py_localpath(self, read_ext): from py.path import local as LocalPath str_path = os.path.join("test1" + read_ext) - with open(str_path, 'rb') as f: + with open(str_path, "rb") as f: x = pd.read_excel(f, "Sheet1", index_col=0) del x # should not throw an exception because the passed file was closed From 22a06fce67d1b5ff3d5383f98e51658b3c253b32 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 8 Mar 2020 20:13:27 +0100 Subject: [PATCH 3/8] Add whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index c473500c205d8..26d5922b2dd23 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -305,6 +305,7 @@ I/O timestamps with ``version="2.0"`` (:issue:`31652`). - Bug in :meth:`read_csv` was raising `TypeError` when `sep=None` was used in combination with `comment` keyword (:issue:`31396`) - Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`) +- Bug in :class:`ExcelFile` where the stream passed into the function was closed by the destructor. (:issue:`31467`) Plotting From 9b9fff9640216816e67c8e192ea0a4de67c37f42 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 8 Mar 2020 20:37:40 +0100 Subject: [PATCH 4/8] Remove unused import --- pandas/tests/io/excel/test_readers.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 999cc2f990a9b..1b0701e721ee0 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -629,13 +629,10 @@ def test_read_from_py_localpath(self, read_ext): tm.assert_frame_equal(expected, actual) - @td.skip_if_no("py.path") @td.check_file_leaks def test_close_from_py_localpath(self, read_ext): # GH31467 - from py.path import local as LocalPath - str_path = os.path.join("test1" + read_ext) with open(str_path, "rb") as f: x = pd.read_excel(f, "Sheet1", index_col=0) From 443780de1691938c11a2bd6ad03297603507cd11 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Mon, 9 Mar 2020 21:30:37 +0100 Subject: [PATCH 5/8] Revert "Fix warning in unit test" This reverts commit 002f1e00ec639b8b3b36438326e0f7a7842201f5. --- pandas/tests/io/excel/test_openpyxl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 60c943d95e510..10ed192062d9c 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -114,7 +114,7 @@ def test_to_excel_with_openpyxl_engine(ext, tmpdir): df2 = DataFrame({"B": np.linspace(1, 20, 10)}) df = pd.concat([df1, df2], axis=1) styled = df.style.applymap( - lambda val: "color: %s" % ("red" if val < 0 else "black") + lambda val: "color: %s" % "red" if val < 0 else "black" ).highlight_max() filename = tmpdir / "styled.xlsx" From 58e83fb45612586fcc6cc74f7a2daf7deb402bcd Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Mon, 9 Mar 2020 21:34:59 +0100 Subject: [PATCH 6/8] Change the close() method of the _BaseExcelReader class --- pandas/io/excel/_base.py | 1 - pandas/io/excel/_odfreader.py | 3 --- pandas/io/excel/_pyxlsb.py | 3 --- pandas/io/excel/_xlrd.py | 3 --- 4 files changed, 10 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c224f2e46879a..d1139f640cef4 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -366,7 +366,6 @@ def _workbook_class(self): def load_workbook(self, filepath_or_buffer): pass - @abc.abstractmethod def close(self): pass diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index b1fb312cd6020..7af776dc1a10f 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -33,9 +33,6 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): return load(filepath_or_buffer) - def close(self): - pass - @property def empty_value(self) -> str: """Property for compat with other readers.""" diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index 13e41f364ac6d..0d96c8c4acdb8 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -36,9 +36,6 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): return open_workbook(filepath_or_buffer) - def close(self): - pass - @property def sheet_names(self) -> List[str]: return self.book.sheets diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index ff9d9d3a448c6..8f7d3b1368fc7 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -36,9 +36,6 @@ def load_workbook(self, filepath_or_buffer): else: return open_workbook(filepath_or_buffer) - def close(self): - pass - @property def sheet_names(self): return self.book.sheet_names() From 760967572cb0d13533f7d786e93f5da1cf288a3e Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Mon, 9 Mar 2020 21:40:47 +0100 Subject: [PATCH 7/8] Moved whatsnew entry from v1.1.0 to v1.0.2 --- doc/source/whatsnew/v1.0.2.rst | 2 +- doc/source/whatsnew/v1.1.0.rst | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 10191542b6d41..245b688496562 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -27,7 +27,7 @@ Fixed regressions - Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) - Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`) - Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`) - +- Fixed regression in :class:`ExcelFile` where the stream passed into the function was closed by the destructor. (:issue:`31467`) .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 26d5922b2dd23..c473500c205d8 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -305,7 +305,6 @@ I/O timestamps with ``version="2.0"`` (:issue:`31652`). - Bug in :meth:`read_csv` was raising `TypeError` when `sep=None` was used in combination with `comment` keyword (:issue:`31396`) - Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`) -- Bug in :class:`ExcelFile` where the stream passed into the function was closed by the destructor. (:issue:`31467`) Plotting From 509f2039b67ddc7091d1cb08e29f8be13198e457 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Wed, 11 Mar 2020 15:42:12 +0100 Subject: [PATCH 8/8] Remove left-over change --- pandas/tests/io/excel/test_openpyxl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 10ed192062d9c..60c943d95e510 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -114,7 +114,7 @@ def test_to_excel_with_openpyxl_engine(ext, tmpdir): df2 = DataFrame({"B": np.linspace(1, 20, 10)}) df = pd.concat([df1, df2], axis=1) styled = df.style.applymap( - lambda val: "color: %s" % "red" if val < 0 else "black" + lambda val: "color: %s" % ("red" if val < 0 else "black") ).highlight_max() filename = tmpdir / "styled.xlsx"