Skip to content

Commit 95ee0b6

Browse files
authored
REG: read_excel with engine specified raises on non-path/non-buffer (#39586)
1 parent 6db0b55 commit 95ee0b6

File tree

5 files changed

+62
-19
lines changed

5 files changed

+62
-19
lines changed

doc/source/whatsnew/v1.2.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Fixed regressions
2323
- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
2424
- Fixed regression in :meth:`DataFrame.transform` failing in case of an empty DataFrame or Series (:issue:`39636`)
2525
- Fixed regression in :meth:`core.window.rolling.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`)
26+
- Fixed regression in :func:`read_excel` that incorrectly raised when the argument ``io`` was a non-path and non-buffer and the ``engine`` argument was specified (:issue:`39528`)
2627
-
2728

2829
.. ---------------------------------------------------------------------------

pandas/io/excel/_base.py

+21-10
Original file line numberDiff line numberDiff line change
@@ -1069,26 +1069,37 @@ def __init__(
10691069

10701070
xlrd_version = LooseVersion(get_version(xlrd))
10711071

1072-
if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book):
1073-
ext = "xls"
1074-
else:
1075-
ext = inspect_excel_format(
1076-
content_or_path=path_or_buffer, storage_options=storage_options
1077-
)
1078-
1072+
ext = None
10791073
if engine is None:
1074+
# Only determine ext if it is needed
1075+
if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book):
1076+
ext = "xls"
1077+
else:
1078+
ext = inspect_excel_format(
1079+
content_or_path=path_or_buffer, storage_options=storage_options
1080+
)
1081+
10801082
# ext will always be valid, otherwise inspect_excel_format would raise
10811083
engine = config.get_option(f"io.excel.{ext}.reader", silent=True)
10821084
if engine == "auto":
10831085
engine = get_default_engine(ext, mode="reader")
10841086

1085-
if engine == "xlrd" and ext != "xls" and xlrd_version is not None:
1086-
if xlrd_version >= "2":
1087+
if engine == "xlrd" and xlrd_version is not None:
1088+
if ext is None:
1089+
# Need ext to determine ext in order to raise/warn
1090+
if isinstance(path_or_buffer, xlrd.Book):
1091+
ext = "xls"
1092+
else:
1093+
ext = inspect_excel_format(
1094+
path_or_buffer, storage_options=storage_options
1095+
)
1096+
1097+
if ext != "xls" and xlrd_version >= "2":
10871098
raise ValueError(
10881099
f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, "
10891100
f"only the xls format is supported. Install openpyxl instead."
10901101
)
1091-
else:
1102+
elif ext != "xls":
10921103
caller = inspect.stack()[1]
10931104
if (
10941105
caller.filename.endswith(

pandas/io/excel/_openpyxl.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -533,15 +533,19 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]:
533533

534534
version = LooseVersion(get_version(openpyxl))
535535

536-
if version >= "3.0.0":
536+
# There is no good way of determining if a sheet is read-only
537+
# https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1605
538+
is_readonly = hasattr(sheet, "reset_dimensions")
539+
540+
if version >= "3.0.0" and is_readonly:
537541
sheet.reset_dimensions()
538542

539543
data: List[List[Scalar]] = []
540544
for row_number, row in enumerate(sheet.rows):
541545
converted_row = [self._convert_cell(cell, convert_float) for cell in row]
542546
data.append(converted_row)
543547

544-
if version >= "3.0.0" and len(data) > 0:
548+
if version >= "3.0.0" and is_readonly and len(data) > 0:
545549
# With dimension reset, openpyxl no longer pads rows
546550
max_width = max(len(data_row) for data_row in data)
547551
if min(len(data_row) for data_row in data) < max_width:

pandas/tests/io/excel/test_openpyxl.py

+26-6
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,17 @@ def test_to_excel_with_openpyxl_engine(ext):
122122
styled.to_excel(filename, engine="openpyxl")
123123

124124

125+
@pytest.mark.parametrize("read_only", [True, False])
126+
def test_read_workbook(datapath, ext, read_only):
127+
# GH 39528
128+
filename = datapath("io", "data", "excel", "test1" + ext)
129+
wb = openpyxl.load_workbook(filename, read_only=read_only)
130+
result = pd.read_excel(wb, engine="openpyxl")
131+
wb.close()
132+
expected = pd.read_excel(filename)
133+
tm.assert_frame_equal(result, expected)
134+
135+
125136
@pytest.mark.parametrize(
126137
"header, expected_data",
127138
[
@@ -139,13 +150,22 @@ def test_to_excel_with_openpyxl_engine(ext):
139150
@pytest.mark.parametrize(
140151
"filename", ["dimension_missing", "dimension_small", "dimension_large"]
141152
)
142-
@pytest.mark.xfail(
143-
LooseVersion(get_version(openpyxl)) < "3.0.0",
144-
reason="openpyxl read-only sheet is incorrect when dimension data is wrong",
145-
)
146-
def test_read_with_bad_dimension(datapath, ext, header, expected_data, filename):
153+
# When read_only is None, use read_excel instead of a workbook
154+
@pytest.mark.parametrize("read_only", [True, False, None])
155+
def test_read_with_bad_dimension(
156+
datapath, ext, header, expected_data, filename, read_only, request
157+
):
147158
# GH 38956, 39001 - no/incorrect dimension information
159+
version = LooseVersion(get_version(openpyxl))
160+
if (read_only or read_only is None) and version < "3.0.0":
161+
msg = "openpyxl read-only sheet is incorrect when dimension data is wrong"
162+
request.node.add_marker(pytest.mark.xfail(reason=msg))
148163
path = datapath("io", "data", "excel", f"{filename}{ext}")
149-
result = pd.read_excel(path, header=header)
164+
if read_only is None:
165+
result = pd.read_excel(path, header=header)
166+
else:
167+
wb = openpyxl.load_workbook(path, read_only=read_only)
168+
result = pd.read_excel(wb, engine="openpyxl", header=header)
169+
wb.close()
150170
expected = DataFrame(expected_data)
151171
tm.assert_frame_equal(result, expected)

pandas/tests/io/excel/test_readers.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from functools import partial
33
import os
44
from urllib.error import URLError
5+
from zipfile import BadZipFile
56

67
import numpy as np
78
import pytest
@@ -685,7 +686,13 @@ def test_missing_file_raises(self, read_ext):
685686

686687
def test_corrupt_bytes_raises(self, read_ext, engine):
687688
bad_stream = b"foo"
688-
with pytest.raises(ValueError, match="File is not a recognized excel file"):
689+
if engine is None or engine == "xlrd":
690+
error = ValueError
691+
msg = "File is not a recognized excel file"
692+
else:
693+
error = BadZipFile
694+
msg = "File is not a zip file"
695+
with pytest.raises(error, match=msg):
689696
pd.read_excel(bad_stream)
690697

691698
@tm.network

0 commit comments

Comments
 (0)