pandas-dev · jreback · Dec 1, 2020 · Nov 2, 2019 · Aug 23, 2020 · Aug 26, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -144,6 +144,8 @@ Deprecations
 ~~~~~~~~~~~~
 - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
 - Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`)
+- :func:`read_excel` "xlrd" engine is deprecated. The recommended engine is "openpyxl" for "xlsx" and "xlsm" files, because "xlrd" is no longer maintained (:issue:`28547`).
+-
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -4,6 +4,7 @@
 import os
 from textwrap import fill
 from typing import Any, Mapping, Union
+import warnings
 
 from pandas._config import config
 
@@ -837,7 +838,7 @@ class ExcelFile:
     engine : str, default None
         If io is not a buffer or path, this must be set to identify io.
         Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``,
-        default ``xlrd``.
+        default ``xlrd`` for .xls* files, ``odf`` for .ods files.
         Engine compatibility :
         - ``xlrd`` supports most old/new Excel file formats.
         - ``openpyxl`` supports newer Excel file formats.
@@ -860,15 +861,26 @@ class ExcelFile:
     def __init__(
         self, path_or_buffer, engine=None, storage_options: StorageOptions = None
     ):
+        ext = None
+        if not isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
+            ext = os.path.splitext(str(path_or_buffer))[-1][1:]
+
         if engine is None:
             engine = "xlrd"
             if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
                 if _is_ods_stream(path_or_buffer):
                     engine = "odf"
             else:
-                ext = os.path.splitext(str(path_or_buffer))[-1]
-                if ext == ".ods":
+                if ext == "ods":
                     engine = "odf"
+
+        elif engine == "xlrd" and ext in ("xlsx", "xlsm"):
+            warnings.warn(
+                'The Excel reader engine "xlrd" is deprecated, use "openpyxl" instead. '
+                'Specify engine="openpyxl" to suppress this warning.',
+                FutureWarning,
+                stacklevel=2,
+            )
         if engine not in self._engines:
             raise ValueError(f"Unknown engine: {engine}")
 

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
@@ -1,3 +1,4 @@
+from datetime import datetime
 from typing import List
 
 import numpy as np
@@ -517,7 +518,11 @@ def _convert_cell(self, cell, convert_float: bool) -> Scalar:
 
         # TODO: replace with openpyxl constants
         if cell.is_date:
-            return cell.value
+            try:
+                # workaround for inaccurate timestamp notation in excel
+                return datetime.fromtimestamp(round(cell.value.timestamp()))
+            except (AttributeError, OSError):
+                return cell.value
         elif cell.data_type == "e":
             return np.nan
         elif cell.data_type == "b":

diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -22,6 +22,9 @@
         marks=[
             td.skip_if_no("xlrd"),
             pytest.mark.filterwarnings("ignore:.*(tree\\.iter|html argument)"),
+            pytest.mark.filterwarnings(
+                'ignore:The Excel reader engine "xlrd" is deprecated,'
+            ),
         ],
     ),
     pytest.param(
@@ -942,7 +945,10 @@ def test_read_excel_squeeze(self, read_ext):
         expected = pd.Series([1, 2, 3], name="a")
         tm.assert_series_equal(actual, expected)
 
-    def test_deprecated_kwargs(self, read_ext):
+    def test_deprecated_kwargs(self, engine, read_ext):
+        if engine == "xlrd":
+            pytest.skip("Use of xlrd engine produces a FutureWarning as well")
+
         with tm.assert_produces_warning(FutureWarning, raise_on_extra_warnings=False):
             pd.read_excel("test1" + read_ext, "Sheet1", 0)
 

diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
@@ -1199,6 +1199,9 @@ def test_datetimes(self, path):
 
         tm.assert_series_equal(write_frame["A"], read_frame["A"])
 
+    @pytest.mark.filterwarnings(
+        'ignore:The Excel reader engine "xlrd" is deprecated:FutureWarning'
+    )
     def test_bytes_io(self, engine):
         # see gh-7074
         bio = BytesIO()

diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
@@ -17,6 +17,9 @@ def skip_ods_and_xlsb_files(read_ext):
         pytest.skip("Not valid for xlrd")
 
 
+@pytest.mark.filterwarnings(
+    'ignore:The Excel reader engine "xlrd" is deprecated:FutureWarning'
+)
 def test_read_xlrd_book(read_ext, frame):
     df = frame
 
@@ -36,8 +39,31 @@ def test_read_xlrd_book(read_ext, frame):
 
 
 # TODO: test for openpyxl as well
+@pytest.mark.filterwarnings(
+    'ignore:The Excel reader engine "xlrd" is deprecated:FutureWarning'
+)
 def test_excel_table_sheet_by_index(datapath, read_ext):
     path = datapath("io", "data", "excel", f"test1{read_ext}")
-    with pd.ExcelFile(path) as excel:
+    with pd.ExcelFile(path, engine="xlrd") as excel:
         with pytest.raises(xlrd.XLRDError):
             pd.read_excel(excel, sheet_name="asdf")
+
+
+def test_excel_file_warning_with_xlsx_file(datapath):
+    # GH 29375
+    path = datapath("io", "data", "excel", "test1.xlsx")
+    with tm.assert_produces_warning(
+        FutureWarning, check_stacklevel=True, raise_on_extra_warnings=False
+    ) as w:
+        pd.ExcelFile(path, engine="xlrd")
+        assert '"xlrd" is deprecated, use "openpyxl" instead.' in str(w[0].message)
+
+
+def test_read_excel_warning_with_xlsx_file(tmpdir, datapath):
+    # GH 29375
+    path = datapath("io", "data", "excel", "test1.xlsx")
+    with tm.assert_produces_warning(
+        FutureWarning, check_stacklevel=False, raise_on_extra_warnings=False
+    ) as w:
+        pd.read_excel(path, "Sheet1", engine="xlrd")
+        assert '"xlrd" is deprecated, use "openpyxl" instead.' in str(w[0].message)