1
1
import abc
2
2
import datetime
3
+ import inspect
3
4
from io import BufferedIOBase , BytesIO , RawIOBase
4
5
import os
5
6
from textwrap import fill
6
7
from typing import Union
8
+ import warnings
7
9
8
10
from pandas ._config import config
9
11
10
12
from pandas ._libs .parsers import STR_NA_VALUES
13
+ from pandas .compat ._optional import import_optional_dependency
11
14
from pandas .errors import EmptyDataError
12
15
from pandas .util ._decorators import Appender , deprecate_nonkeyword_arguments
13
16
104
107
of dtype conversion.
105
108
engine : str, default None
106
109
If io is not a buffer or path, this must be set to identify io.
107
- Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", default "xlrd" .
110
+ Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb".
108
111
Engine compatibility :
112
+
109
113
- "xlrd" supports most old/new Excel file formats.
110
114
- "openpyxl" supports newer Excel file formats.
111
115
- "odf" supports OpenDocument file formats (.odf, .ods, .odt).
112
116
- "pyxlsb" supports Binary Excel files.
117
+
118
+ .. versionchanged:: 1.1.5 in Debian, 1.2.0 upstream
119
+ The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_
120
+ is no longer maintained, and is not supported with
121
+ python >= 3.9. When ``engine=None``, the following logic will be
122
+ used to determine the engine.
123
+
124
+ - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),
125
+ then `odf <https://pypi.org/project/odfpy/>`_ will be used.
126
+ - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the
127
+ extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd`` will
128
+ be used.
129
+ - Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
130
+ then ``openpyxl`` will be used.
131
+ - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
132
+
133
+ Specifying ``engine="xlrd"`` will continue to be allowed for the
134
+ indefinite future, but may require uninstalling (python3-)defusedxml.
135
+
113
136
converters : dict, default None
114
137
Dict of functions for converting values in certain columns. Keys can
115
138
either be integers or column labels, values are functions that take one
@@ -823,13 +846,32 @@ class ExcelFile:
823
846
.xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
824
847
engine : str, default None
825
848
If io is not a buffer or path, this must be set to identify io.
826
- Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``,
827
- default ``xlrd``.
849
+ Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``
828
850
Engine compatibility :
851
+
829
852
- ``xlrd`` supports most old/new Excel file formats.
830
853
- ``openpyxl`` supports newer Excel file formats.
831
854
- ``odf`` supports OpenDocument file formats (.odf, .ods, .odt).
832
855
- ``pyxlsb`` supports Binary Excel files.
856
+
857
+ .. versionchanged:: 1.1.5 in Debian, 1.2.0 upstream
858
+
859
+ The engine `xlrd <https://xlrd.readthedocs.io/en/latest/>`_
860
+ is no longer maintained, and is not supported with
861
+ python >= 3.9. When ``engine=None``, the following logic will be
862
+ used to determine the engine.
863
+
864
+ - If ``path_or_buffer`` is an OpenDocument format (.odf, .ods, .odt),
865
+ then `odf <https://pypi.org/project/odfpy/>`_ will be used.
866
+ - Otherwise if ``path_or_buffer`` is a bytes stream, the file has the
867
+ extension ``.xls``, or is an ``xlrd`` Book instance, then ``xlrd``
868
+ will be used.
869
+ - Otherwise if `openpyxl <https://pypi.org/project/openpyxl/>`_ is installed,
870
+ then ``openpyxl`` will be used.
871
+ - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised.
872
+
873
+ Specifying ``engine="xlrd"`` will continue to be allowed for the
874
+ indefinite future, but may require uninstalling (python3-)defusedxml.
833
875
"""
834
876
835
877
from pandas .io .excel ._odfreader import _ODFReader
@@ -846,14 +888,59 @@ class ExcelFile:
846
888
847
889
def __init__ (self , path_or_buffer , engine = None ):
848
890
if engine is None :
849
- engine = "xlrd"
891
+ # Determine ext and use odf for ods stream/file
850
892
if isinstance (path_or_buffer , (BufferedIOBase , RawIOBase )):
893
+ ext = None
851
894
if _is_ods_stream (path_or_buffer ):
852
895
engine = "odf"
853
896
else :
854
897
ext = os .path .splitext (str (path_or_buffer ))[- 1 ]
855
898
if ext == ".ods" :
856
899
engine = "odf"
900
+
901
+ if (
902
+ import_optional_dependency (
903
+ "xlrd" , raise_on_missing = False , on_version = "ignore"
904
+ )
905
+ is not None
906
+ ):
907
+ from xlrd import Book
908
+
909
+ if isinstance (path_or_buffer , Book ):
910
+ engine = "xlrd"
911
+
912
+ # GH 35029 - Prefer openpyxl except for xls files
913
+ if engine is None :
914
+ if ext is None or isinstance (path_or_buffer , bytes ) or ext == ".xls" :
915
+ engine = "xlrd"
916
+ elif (
917
+ import_optional_dependency (
918
+ "openpyxl" , raise_on_missing = False , on_version = "ignore"
919
+ )
920
+ is not None
921
+ ):
922
+ engine = "openpyxl"
923
+ else :
924
+ caller = inspect .stack ()[1 ]
925
+ if (
926
+ caller .filename .endswith ("pandas/io/excel/_base.py" )
927
+ and caller .function == "read_excel"
928
+ ):
929
+ stacklevel = 4
930
+ else :
931
+ stacklevel = 2
932
+ warnings .warn (
933
+ "The xlrd engine is no longer maintained and is not "
934
+ "supported when using pandas with python >= 3.9. However, "
935
+ "the engine xlrd will continue to be allowed for the "
936
+ "indefinite future. The "
937
+ "openpyxl engine will be used if it is installed and the "
938
+ "engine argument is not specified. Either install openpyxl "
939
+ "or specify engine='xlrd' to silence this warning." ,
940
+ FutureWarning ,
941
+ stacklevel = stacklevel ,
942
+ )
943
+ engine = "xlrd"
857
944
if engine not in self ._engines :
858
945
raise ValueError (f"Unknown engine: { engine } " )
859
946
0 commit comments