Skip to content

Commit 60fd04a

Browse files
authored
Backport PR #41806 on branch 1.2.x" (#41835)
1 parent 29e8da1 commit 60fd04a

File tree

4 files changed

+30
-7
lines changed

4 files changed

+30
-7
lines changed

doc/source/whatsnew/v1.2.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Fixed regression in :meth:`DataFrame.sum` and :meth:`DataFrame.prod` when ``min_count`` and ``numeric_only`` are both given (:issue:`41074`)
1919
- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
2020
- Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)
21+
- Regression in :func:`ExcelFile` when a corrupt file is opened but not closed (:issue:`41778`)
2122

2223
.. ---------------------------------------------------------------------------
2324

pandas/io/excel/_base.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,11 @@ def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
388388
elif hasattr(self.handles.handle, "read"):
389389
# N.B. xlrd.Book has a read attribute too
390390
self.handles.handle.seek(0)
391-
self.book = self.load_workbook(self.handles.handle)
391+
try:
392+
self.book = self.load_workbook(self.handles.handle)
393+
except Exception:
394+
self.close()
395+
raise
392396
elif isinstance(self.handles.handle, bytes):
393397
self.book = self.load_workbook(BytesIO(self.handles.handle))
394398
else:
@@ -406,6 +410,11 @@ def load_workbook(self, filepath_or_buffer):
406410
pass
407411

408412
def close(self):
413+
if hasattr(self, "book") and hasattr(self.book, "close"):
414+
# pyxlsb: opens a TemporaryFile
415+
# openpyxl: https://stackoverflow.com/questions/31416842/
416+
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
417+
self.book.close()
409418
self.handles.close()
410419

411420
@property

pandas/io/excel/_openpyxl.py

-6
Original file line numberDiff line numberDiff line change
@@ -487,12 +487,6 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
487487
filepath_or_buffer, read_only=True, data_only=True, keep_links=False
488488
)
489489

490-
def close(self):
491-
# https://stackoverflow.com/questions/31416842/
492-
# openpyxl-does-not-close-excel-workbook-in-read-only-mode
493-
self.book.close()
494-
super().close()
495-
496490
@property
497491
def sheet_names(self) -> List[str]:
498492
return self.book.sheetnames

pandas/tests/io/excel/test_readers.py

+19
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from datetime import datetime, time
22
from functools import partial
33
import os
4+
from pathlib import Path
45
from urllib.error import URLError
56
from zipfile import BadZipFile
67

@@ -1273,3 +1274,21 @@ def test_read_datetime_multiindex(self, engine, read_ext):
12731274
expected = DataFrame([], columns=expected_column_index)
12741275

12751276
tm.assert_frame_equal(expected, actual)
1277+
1278+
def test_corrupt_files_closed(self, request, engine, read_ext):
1279+
# GH41778
1280+
errors = (BadZipFile, ValueError)
1281+
if engine is None:
1282+
pytest.skip()
1283+
elif engine == "xlrd":
1284+
import xlrd
1285+
1286+
errors = (BadZipFile, ValueError, xlrd.biffh.XLRDError)
1287+
1288+
with tm.ensure_clean(f"corrupt{read_ext}") as file:
1289+
Path(file).write_text("corrupt")
1290+
with tm.assert_produces_warning(False):
1291+
try:
1292+
pd.ExcelFile(file, engine=engine)
1293+
except errors:
1294+
pass

0 commit comments

Comments
 (0)