Skip to content

Commit bdd9feb

Browse files
twoertweinJulianWgs
authored andcommitted
REGR: close corrupt files in ExcelFile (pandas-dev#41806)
1 parent f284a0f commit bdd9feb

File tree

4 files changed

+29
-9
lines changed

4 files changed

+29
-9
lines changed

doc/source/whatsnew/v1.2.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Fixed regression in :meth:`DataFrame.sum` and :meth:`DataFrame.prod` when ``min_count`` and ``numeric_only`` are both given (:issue:`41074`)
1919
- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
2020
- Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)
21+
- Regression in :func:`ExcelFile` when a corrupt file is opened but not closed (:issue:`41778`)
2122

2223
.. ---------------------------------------------------------------------------
2324

pandas/io/excel/_base.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,11 @@ def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
422422
elif hasattr(self.handles.handle, "read"):
423423
# N.B. xlrd.Book has a read attribute too
424424
self.handles.handle.seek(0)
425-
self.book = self.load_workbook(self.handles.handle)
425+
try:
426+
self.book = self.load_workbook(self.handles.handle)
427+
except Exception:
428+
self.close()
429+
raise
426430
elif isinstance(self.handles.handle, bytes):
427431
self.book = self.load_workbook(BytesIO(self.handles.handle))
428432
else:
@@ -440,8 +444,10 @@ def load_workbook(self, filepath_or_buffer):
440444
pass
441445

442446
def close(self):
443-
if hasattr(self.book, "close"):
444-
# pyxlsb opens a TemporaryFile
447+
if hasattr(self, "book") and hasattr(self.book, "close"):
448+
# pyxlsb: opens a TemporaryFile
449+
# openpyxl: https://stackoverflow.com/questions/31416842/
450+
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
445451
self.book.close()
446452
self.handles.close()
447453

pandas/io/excel/_openpyxl.py

-6
Original file line numberDiff line numberDiff line change
@@ -528,12 +528,6 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
528528
filepath_or_buffer, read_only=True, data_only=True, keep_links=False
529529
)
530530

531-
def close(self):
532-
# https://stackoverflow.com/questions/31416842/
533-
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
534-
self.book.close()
535-
super().close()
536-
537531
@property
538532
def sheet_names(self) -> list[str]:
539533
return self.book.sheetnames

pandas/tests/io/excel/test_readers.py

+19
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
)
55
from functools import partial
66
import os
7+
from pathlib import Path
78
from urllib.error import URLError
89
from zipfile import BadZipFile
910

@@ -1499,3 +1500,21 @@ def test_engine_invalid_option(self, read_ext):
14991500
with pytest.raises(ValueError, match="Value must be one of *"):
15001501
with pd.option_context(f"io.excel{read_ext}.reader", "abc"):
15011502
pass
1503+
1504+
def test_corrupt_files_closed(self, request, engine, read_ext):
1505+
# GH41778
1506+
errors = (BadZipFile,)
1507+
if engine is None:
1508+
pytest.skip()
1509+
elif engine == "xlrd":
1510+
import xlrd
1511+
1512+
errors = (BadZipFile, xlrd.biffh.XLRDError)
1513+
1514+
with tm.ensure_clean(f"corrupt{read_ext}") as file:
1515+
Path(file).write_text("corrupt")
1516+
with tm.assert_produces_warning(False):
1517+
try:
1518+
pd.ExcelFile(file, engine=engine)
1519+
except errors:
1520+
pass

0 commit comments

Comments
 (0)