Skip to content

CLN: Remove xlrd < 2.0 code #49376

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Nov 2, 2022
Merged
34 changes: 8 additions & 26 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1597,9 +1597,9 @@ def __init__(

xlrd_version = Version(get_version(xlrd))

ext = None
if engine is None:
# Only determine ext if it is needed
ext: str | None
if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book):
ext = "xls"
else:
Expand All @@ -1616,31 +1616,13 @@ def __init__(
if engine == "auto":
engine = get_default_engine(ext, mode="reader")

if engine == "xlrd" and xlrd_version is not None:
if ext is None:
# Need ext to determine ext in order to raise/warn
if isinstance(path_or_buffer, xlrd.Book):
ext = "xls"
else:
ext = inspect_excel_format(
path_or_buffer, storage_options=storage_options
)

# Pass through if ext is None, otherwise check if ext valid for xlrd
if ext and ext != "xls" and xlrd_version >= Version("2"):
raise ValueError(
f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, "
f"only the xls format is supported. Install openpyxl instead."
)
elif ext and ext != "xls":
stacklevel = find_stack_level()
warnings.warn(
f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, "
f"only the xls format is supported. Install "
f"openpyxl instead.",
FutureWarning,
stacklevel=stacklevel,
)
if engine == "xlrd":
warnings.warn(
"The xlrd engine is no longer maintained and "
"will likely be unusable in the future",
category=UserWarning,
stacklevel=find_stack_level(),
)

assert engine is not None
self.engine = engine
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_xlrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(
Object to be parsed.
{storage_options}
"""
err_msg = "Install xlrd >= 1.0.0 for Excel support"
err_msg = "Install xlrd >= 2.0.1 for Excel support"
import_optional_dependency("xlrd", extra=err_msg)
super().__init__(filepath_or_buffer, storage_options=storage_options)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/excel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
pytest.mark.filterwarnings(
"ignore:This method will be removed in future versions:DeprecationWarning"
),
# GH 38571
# GH#49376
pytest.mark.filterwarnings(
"ignore:.*In xlrd >= 2.0, only the xls format is supported:FutureWarning"
"ignore:The xlrd engine is no longer maintained.*:UserWarning"
),
]
6 changes: 5 additions & 1 deletion pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1317,6 +1317,8 @@ def test_read_excel_squeeze(self, read_ext):
match="The squeeze argument has been deprecated "
"and will be removed in a future version. "
'Append .squeeze\\("columns"\\) to the call to squeeze.\n\n',
# xlrd always raises a warning
raise_on_extra_warnings=(read_ext == "xls"),
):
actual = pd.read_excel(
f, sheet_name="two_columns", index_col=0, squeeze=True
Expand Down Expand Up @@ -1685,7 +1687,9 @@ def test_corrupt_files_closed(self, engine, read_ext):

with tm.ensure_clean(f"corrupt{read_ext}") as file:
Path(file).write_text("corrupt")
with tm.assert_produces_warning(False):
warn = UserWarning if engine == "xlrd" else None
msg = "The xlrd engine is no longer maintained"
with tm.assert_produces_warning(warn, match=msg):
try:
pd.ExcelFile(file, engine=engine)
except errors:
Expand Down
42 changes: 11 additions & 31 deletions pandas/tests/io/excel/test_xlrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import pytest

from pandas.compat._optional import import_optional_dependency

import pandas as pd
import pandas._testing as tm

Expand All @@ -30,44 +28,26 @@ def test_read_xlrd_book(read_ext_xlrd, datapath):
sheet_name = "Sheet1"
pth = datapath("io", "data", "excel", "test1.xls")
with xlrd.open_workbook(pth) as book:
with ExcelFile(book, engine=engine) as xl:
result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0)
msg = "The xlrd engine is no longer maintained"
with tm.assert_produces_warning(UserWarning, match=msg):
with ExcelFile(book, engine=engine) as xl:
result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0)

expected = pd.read_excel(
book, sheet_name=sheet_name, engine=engine, index_col=0
)
tm.assert_frame_equal(result, expected)


def test_excel_file_warning_with_xlsx_file(datapath):
# GH 29375
path = datapath("io", "data", "excel", "test1.xlsx")
has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None
if not has_openpyxl:
with tm.assert_produces_warning(
FutureWarning,
raise_on_extra_warnings=False,
match="The xlrd engine is no longer maintained",
):
ExcelFile(path, engine=None)
else:
with tm.assert_produces_warning(None):
pd.read_excel(path, "Sheet1", engine=None)


def test_read_excel_warning_with_xlsx_file(datapath):
def test_read_xlsx_fails(datapath):
# GH 29375
from xlrd.biffh import XLRDError

path = datapath("io", "data", "excel", "test1.xlsx")
has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None
if not has_openpyxl:
with pytest.raises(
ValueError,
match="Your version of xlrd is ",
):
pd.read_excel(path, "Sheet1", engine=None)
else:
with tm.assert_produces_warning(None):
pd.read_excel(path, "Sheet1", engine=None)
msg = "The xlrd engine is no longer maintained"
with tm.assert_produces_warning(UserWarning, match=msg):
with pytest.raises(XLRDError, match="Excel xlsx file; not supported"):
pd.read_excel(path, engine="xlrd")


@pytest.mark.parametrize(
Expand Down