Skip to content

Commit b3a3932

Browse files
DEPR: Deprecate using xlrd engine for read_excel (#35029)
1 parent 1829a61 commit b3a3932

File tree

5 files changed

+157
-8
lines changed

5 files changed

+157
-8
lines changed

doc/source/whatsnew/v1.2.0.rst

+9
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@ including other versions of pandas.
88

99
{{ header }}
1010

11+
.. warning::
12+
13+
Previously, the default argument ``engine=None`` to ``pd.read_excel``
14+
would result in using the `xlrd <https://xlrd.readthedocs.io/en/latest/>`_ engine in
15+
many cases. The engine ``xlrd`` is no longer maintained, and is not supported with
16+
python >= 3.9. If `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
17+
many of these cases will now default to using the ``openpyxl`` engine. See the
18+
:func:`read_excel` documentation for more details.
19+
1120
.. ---------------------------------------------------------------------------
1221
1322
Enhancements

pandas/io/excel/_base.py

+91-4
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
import abc
22
import datetime
3+
import inspect
34
from io import BufferedIOBase, BytesIO, RawIOBase
45
import os
56
from textwrap import fill
67
from typing import Any, Dict, Mapping, Union, cast
8+
import warnings
79

810
from pandas._config import config
911

1012
from pandas._libs.parsers import STR_NA_VALUES
1113
from pandas._typing import Buffer, FilePathOrBuffer, StorageOptions
14+
from pandas.compat._optional import import_optional_dependency
1215
from pandas.errors import EmptyDataError
1316
from pandas.util._decorators import Appender, deprecate_nonkeyword_arguments
1417

@@ -99,12 +102,32 @@
99102
of dtype conversion.
100103
engine : str, default None
101104
If io is not a buffer or path, this must be set to identify io.
102-
Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", default "xlrd".
105+
Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb".
103106
Engine compatibility :
107+
104108
- "xlrd" supports most old/new Excel file formats.
105109
- "openpyxl" supports newer Excel file formats.
106110
- "odf" supports OpenDocument file formats (.odf, .ods, .odt).
107111
- "pyxlsb" supports Binary Excel files.
112+
113+
.. versionchanged:: 1.2.0
114+
The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_
115+
is no longer maintained, and is not supported with
116+
python >= 3.9. When ``engine=None``, the following logic will be
117+
used to determine the engine.
118+
119+
- If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),
120+
then `odf <https://pypi.org/project/odfpy/>`_ will be used.
121+
- Otherwise if ``path_or_buffer`` is a bytes stream, the file has the
122+
extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd`` will
123+
be used.
124+
- Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
125+
then ``openpyxl`` will be used.
126+
- Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
127+
128+
Specifying ``engine="xlrd"`` will continue to be allowed for the
129+
indefinite future.
130+
108131
converters : dict, default None
109132
Dict of functions for converting values in certain columns. Keys can
110133
either be integers or column labels, values are functions that take one
@@ -880,13 +903,32 @@ class ExcelFile:
880903
.xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
881904
engine : str, default None
882905
If io is not a buffer or path, this must be set to identify io.
883-
Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``,
884-
default ``xlrd``.
906+
Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``
885907
Engine compatibility :
908+
886909
- ``xlrd`` supports most old/new Excel file formats.
887910
- ``openpyxl`` supports newer Excel file formats.
888911
- ``odf`` supports OpenDocument file formats (.odf, .ods, .odt).
889912
- ``pyxlsb`` supports Binary Excel files.
913+
914+
.. versionchanged:: 1.2.0
915+
916+
The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_
917+
is no longer maintained, and is not supported with
918+
python >= 3.9. When ``engine=None``, the following logic will be
919+
used to determine the engine.
920+
921+
- If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),
922+
then `odf <https://pypi.org/project/odfpy/>`_ will be used.
923+
- Otherwise if ``path_or_buffer`` is a bytes stream, the file has the
924+
extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd``
925+
will be used.
926+
- Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
927+
then ``openpyxl`` will be used.
928+
- Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
929+
930+
Specifying ``engine="xlrd"`` will continue to be allowed for the
931+
indefinite future.
890932
"""
891933

892934
from pandas.io.excel._odfreader import ODFReader
@@ -905,14 +947,59 @@ def __init__(
905947
self, path_or_buffer, engine=None, storage_options: StorageOptions = None
906948
):
907949
if engine is None:
908-
engine = "xlrd"
950+
# Determine ext and use odf for ods stream/file
909951
if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
952+
ext = None
910953
if _is_ods_stream(path_or_buffer):
911954
engine = "odf"
912955
else:
913956
ext = os.path.splitext(str(path_or_buffer))[-1]
914957
if ext == ".ods":
915958
engine = "odf"
959+
960+
if (
961+
import_optional_dependency(
962+
"xlrd", raise_on_missing=False, on_version="ignore"
963+
)
964+
is not None
965+
):
966+
from xlrd import Book
967+
968+
if isinstance(path_or_buffer, Book):
969+
engine = "xlrd"
970+
971+
# GH 35029 - Prefer openpyxl except for xls files
972+
if engine is None:
973+
if ext is None or isinstance(path_or_buffer, bytes) or ext == ".xls":
974+
engine = "xlrd"
975+
elif (
976+
import_optional_dependency(
977+
"openpyxl", raise_on_missing=False, on_version="ignore"
978+
)
979+
is not None
980+
):
981+
engine = "openpyxl"
982+
else:
983+
caller = inspect.stack()[1]
984+
if (
985+
caller.filename.endswith("pandas/io/excel/_base.py")
986+
and caller.function == "read_excel"
987+
):
988+
stacklevel = 4
989+
else:
990+
stacklevel = 2
991+
warnings.warn(
992+
"The xlrd engine is no longer maintained and is not "
993+
"supported when using pandas with python >= 3.9. However, "
994+
"the engine xlrd will continue to be allowed for the "
995+
"indefinite future. Beginning with pandas 1.2.0, the "
996+
"openpyxl engine will be used if it is installed and the "
997+
"engine argument is not specified. Either install openpyxl "
998+
"or specify engine='xlrd' to silence this warning.",
999+
FutureWarning,
1000+
stacklevel=stacklevel,
1001+
)
1002+
engine = "xlrd"
9161003
if engine not in self._engines:
9171004
raise ValueError(f"Unknown engine: {engine}")
9181005

pandas/tests/io/excel/test_readers.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,10 @@ def test_date_conversion_overflow(self, read_ext):
577577
if pd.read_excel.keywords["engine"] == "openpyxl":
578578
pytest.xfail("Maybe not supported by openpyxl")
579579

580+
if pd.read_excel.keywords["engine"] is None:
581+
# GH 35029
582+
pytest.xfail("Defaults to openpyxl, maybe not supported")
583+
580584
result = pd.read_excel("testdateoverflow" + read_ext)
581585
tm.assert_frame_equal(result, expected)
582586

@@ -1159,7 +1163,7 @@ def test_excel_high_surrogate(self, engine):
11591163
expected = DataFrame(["\udc88"], columns=["Column1"])
11601164

11611165
# should not produce a segmentation violation
1162-
actual = pd.read_excel("high_surrogate.xlsx")
1166+
actual = pd.read_excel("high_surrogate.xlsx", engine="xlrd")
11631167
tm.assert_frame_equal(expected, actual)
11641168

11651169
@pytest.mark.parametrize("filename", ["df_empty.xlsx", "df_equals.xlsx"])

pandas/tests/io/excel/test_writers.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -351,12 +351,15 @@ def test_excel_sheet_by_name_raise(self, path, engine):
351351
msg = "sheet 0 not found"
352352
with pytest.raises(ValueError, match=msg):
353353
pd.read_excel(xl, "0")
354-
else:
354+
elif engine == "xlwt":
355355
import xlrd
356356

357357
msg = "No sheet named <'0'>"
358358
with pytest.raises(xlrd.XLRDError, match=msg):
359359
pd.read_excel(xl, sheet_name="0")
360+
else:
361+
with pytest.raises(KeyError, match="Worksheet 0 does not exist."):
362+
pd.read_excel(xl, sheet_name="0")
360363

361364
def test_excel_writer_context_manager(self, frame, path):
362365
with ExcelWriter(path) as writer:
@@ -1192,7 +1195,9 @@ def test_datetimes(self, path):
11921195

11931196
write_frame = DataFrame({"A": datetimes})
11941197
write_frame.to_excel(path, "Sheet1")
1195-
read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0)
1198+
# GH 35029 - Default changed to openpyxl, but test is for odf/xlrd
1199+
engine = "odf" if path.endswith("ods") else "xlrd"
1200+
read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0, engine=engine)
11961201

11971202
tm.assert_series_equal(write_frame["A"], read_frame["A"])
11981203

pandas/tests/io/excel/test_xlrd.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import pytest
22

3+
from pandas.compat._optional import import_optional_dependency
4+
35
import pandas as pd
46
import pandas._testing as tm
57

@@ -38,6 +40,48 @@ def test_read_xlrd_book(read_ext, frame):
3840
# TODO: test for openpyxl as well
3941
def test_excel_table_sheet_by_index(datapath, read_ext):
4042
path = datapath("io", "data", "excel", f"test1{read_ext}")
41-
with ExcelFile(path) as excel:
43+
with ExcelFile(path, engine="xlrd") as excel:
4244
with pytest.raises(xlrd.XLRDError):
4345
pd.read_excel(excel, sheet_name="asdf")
46+
47+
48+
def test_excel_file_warning_with_xlsx_file(datapath):
49+
# GH 29375
50+
path = datapath("io", "data", "excel", "test1.xlsx")
51+
has_openpyxl = (
52+
import_optional_dependency(
53+
"openpyxl", raise_on_missing=False, on_version="ignore"
54+
)
55+
is not None
56+
)
57+
if not has_openpyxl:
58+
with tm.assert_produces_warning(
59+
FutureWarning,
60+
raise_on_extra_warnings=False,
61+
match="The xlrd engine is no longer maintained",
62+
):
63+
ExcelFile(path, engine=None)
64+
else:
65+
with tm.assert_produces_warning(None):
66+
pd.read_excel(path, "Sheet1", engine=None)
67+
68+
69+
def test_read_excel_warning_with_xlsx_file(tmpdir, datapath):
70+
# GH 29375
71+
path = datapath("io", "data", "excel", "test1.xlsx")
72+
has_openpyxl = (
73+
import_optional_dependency(
74+
"openpyxl", raise_on_missing=False, on_version="ignore"
75+
)
76+
is not None
77+
)
78+
if not has_openpyxl:
79+
with tm.assert_produces_warning(
80+
FutureWarning,
81+
raise_on_extra_warnings=False,
82+
match="The xlrd engine is no longer maintained",
83+
):
84+
pd.read_excel(path, "Sheet1", engine=None)
85+
else:
86+
with tm.assert_produces_warning(None):
87+
pd.read_excel(path, "Sheet1", engine=None)

0 commit comments

Comments
 (0)