1
1
import abc
2
2
import datetime
3
+ import inspect
3
4
from io import BufferedIOBase , BytesIO , RawIOBase
4
5
import os
5
6
from textwrap import fill
6
7
from typing import Any , Dict , Mapping , Union , cast
8
+ import warnings
7
9
8
10
from pandas ._config import config
9
11
10
12
from pandas ._libs .parsers import STR_NA_VALUES
11
13
from pandas ._typing import Buffer , FilePathOrBuffer , StorageOptions
14
+ from pandas .compat ._optional import import_optional_dependency
12
15
from pandas .errors import EmptyDataError
13
16
from pandas .util ._decorators import Appender , deprecate_nonkeyword_arguments
14
17
99
102
of dtype conversion.
100
103
engine : str, default None
101
104
If io is not a buffer or path, this must be set to identify io.
102
- Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", default "xlrd" .
105
+ Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb".
103
106
Engine compatibility :
107
+
104
108
- "xlrd" supports most old/new Excel file formats.
105
109
- "openpyxl" supports newer Excel file formats.
106
110
- "odf" supports OpenDocument file formats (.odf, .ods, .odt).
107
111
- "pyxlsb" supports Binary Excel files.
112
+
113
+ .. versionchanged:: 1.2.0
114
+ The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_
115
+ is no longer maintained, and is not supported with
116
+ python >= 3.9. When ``engine=None``, the following logic will be
117
+ used to determine the engine.
118
+
119
+ - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),
120
+ then `odf <https://pypi.org/project/odfpy/>`_ will be used.
121
+ - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the
122
+ extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd`` will
123
+ be used.
124
+ - Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
125
+ then ``openpyxl`` will be used.
126
+ - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
127
+
128
+ Specifying ``engine="xlrd"`` will continue to be allowed for the
129
+ indefinite future.
130
+
108
131
converters : dict, default None
109
132
Dict of functions for converting values in certain columns. Keys can
110
133
either be integers or column labels, values are functions that take one
@@ -880,13 +903,32 @@ class ExcelFile:
880
903
.xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
881
904
engine : str, default None
882
905
If io is not a buffer or path, this must be set to identify io.
883
- Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``,
884
- default ``xlrd``.
906
+ Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``
885
907
Engine compatibility :
908
+
886
909
- ``xlrd`` supports most old/new Excel file formats.
887
910
- ``openpyxl`` supports newer Excel file formats.
888
911
- ``odf`` supports OpenDocument file formats (.odf, .ods, .odt).
889
912
- ``pyxlsb`` supports Binary Excel files.
913
+
914
+ .. versionchanged:: 1.2.0
915
+
916
+ The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_
917
+ is no longer maintained, and is not supported with
918
+ python >= 3.9. When ``engine=None``, the following logic will be
919
+ used to determine the engine.
920
+
921
+ - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),
922
+ then `odf <https://pypi.org/project/odfpy/>`_ will be used.
923
+ - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the
924
+ extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd``
925
+ will be used.
926
+ - Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
927
+ then ``openpyxl`` will be used.
928
+ - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
929
+
930
+ Specifying ``engine="xlrd"`` will continue to be allowed for the
931
+ indefinite future.
890
932
"""
891
933
892
934
from pandas .io .excel ._odfreader import ODFReader
@@ -905,14 +947,59 @@ def __init__(
905
947
self , path_or_buffer , engine = None , storage_options : StorageOptions = None
906
948
):
907
949
if engine is None :
908
- engine = "xlrd"
950
+ # Determine ext and use odf for ods stream/file
909
951
if isinstance (path_or_buffer , (BufferedIOBase , RawIOBase )):
952
+ ext = None
910
953
if _is_ods_stream (path_or_buffer ):
911
954
engine = "odf"
912
955
else :
913
956
ext = os .path .splitext (str (path_or_buffer ))[- 1 ]
914
957
if ext == ".ods" :
915
958
engine = "odf"
959
+
960
+ if (
961
+ import_optional_dependency (
962
+ "xlrd" , raise_on_missing = False , on_version = "ignore"
963
+ )
964
+ is not None
965
+ ):
966
+ from xlrd import Book
967
+
968
+ if isinstance (path_or_buffer , Book ):
969
+ engine = "xlrd"
970
+
971
+ # GH 35029 - Prefer openpyxl except for xls files
972
+ if engine is None :
973
+ if ext is None or isinstance (path_or_buffer , bytes ) or ext == ".xls" :
974
+ engine = "xlrd"
975
+ elif (
976
+ import_optional_dependency (
977
+ "openpyxl" , raise_on_missing = False , on_version = "ignore"
978
+ )
979
+ is not None
980
+ ):
981
+ engine = "openpyxl"
982
+ else :
983
+ caller = inspect .stack ()[1 ]
984
+ if (
985
+ caller .filename .endswith ("pandas/io/excel/_base.py" )
986
+ and caller .function == "read_excel"
987
+ ):
988
+ stacklevel = 4
989
+ else :
990
+ stacklevel = 2
991
+ warnings .warn (
992
+ "The xlrd engine is no longer maintained and is not "
993
+ "supported when using pandas with python >= 3.9. However, "
994
+ "the engine xlrd will continue to be allowed for the "
995
+ "indefinite future. Beginning with pandas 1.2.0, the "
996
+ "openpyxl engine will be used if it is installed and the "
997
+ "engine argument is not specified. Either install openpyxl "
998
+ "or specify engine='xlrd' to silence this warning." ,
999
+ FutureWarning ,
1000
+ stacklevel = stacklevel ,
1001
+ )
1002
+ engine = "xlrd"
916
1003
if engine not in self ._engines :
917
1004
raise ValueError (f"Unknown engine: { engine } " )
918
1005
0 commit comments