Revert all changes related to switching to openpyxl as the default

roberthdevries · roberthdevries · commit 101aa9766daa · 2020-08-26T11:01:53.000+02:00
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -144,7 +144,7 @@ Deprecations
 ~~~~~~~~~~~~
 - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
 - Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`)
-- :func:`read_excel` default engine "xlrd" is replaced by "openpyxl" because "xlrd" is deprecated (:issue:`28547`).
+- :func:`read_excel` "xlrd" engine is deprecated for all file types that can be handled by "openpyxl" because "xlrd" is no longer maintained (:issue:`28547`).
 -
 -
 
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -826,7 +826,8 @@ def _is_ods_stream(stream: Union[BufferedIOBase, RawIOBase]) -> bool:
 class ExcelFile:
     """
     Class for parsing tabular excel sheets into DataFrame objects.
-    Uses xlrd, openpyxl or odf. See read_excel for more documentation
+
+    Uses xlrd engine by default. See read_excel for more documentation
 
     Parameters
     ----------
@@ -837,7 +838,7 @@ class ExcelFile:
     engine : str, default None
         If io is not a buffer or path, this must be set to identify io.
         Supported engines: ``xlrd``, ``openpyxl``, ``odf``, ``pyxlsb``,
-        default ``openpyxl``, ``xlrd`` for .xls files, ``odf`` for .ods files.
+        default ``xlrd`` for .xls* files, ``odf`` for .ods files.
         Engine compatibility :
         - ``xlrd`` supports most old/new Excel file formats.
         - ``openpyxl`` supports newer Excel file formats.
@@ -860,19 +861,20 @@ class ExcelFile:
     def __init__(
         self, path_or_buffer, engine=None, storage_options: StorageOptions = None
     ):
+        ext = None
+        if not isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
+            ext = os.path.splitext(str(path_or_buffer))[-1][1:]
+
         if engine is None:
-            engine = "openpyxl"
+            engine = "xlrd"
             if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
                 if _is_ods_stream(path_or_buffer):
                     engine = "odf"
             else:
-                ext = os.path.splitext(str(path_or_buffer))[-1]
-                if ext == ".ods":
+                if ext == "ods":
                     engine = "odf"
-                elif ext == ".xls":
-                    engine = "xlrd"
 
-        elif engine == "xlrd":
+        elif engine == "xlrd" and ext in ("xlsx", "xlsm"):
             warnings.warn(
                 'The Excel reader engine "xlrd" is deprecated, use "openpyxl" instead. '
                 'Specify engine="openpyxl" to suppress this warning.',
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -37,8 +37,8 @@
     pytest.param(
         None,
         marks=[
-            td.skip_if_no("openpyxl"),
-            pytest.mark.filterwarnings("ignore:.*html argument"),
+            td.skip_if_no("xlrd"),
+            pytest.mark.filterwarnings("ignore:.*(tree\\.iter|html argument)"),
         ],
     ),
     pytest.param("pyxlsb", marks=td.skip_if_no("pyxlsb")),
@@ -54,8 +54,6 @@ def _is_valid_engine_ext_pair(engine, read_ext: str) -> bool:
     engine = engine.values[0]
     if engine == "openpyxl" and read_ext == ".xls":
         return False
-    if engine is None and read_ext == ".xls":
-        return False
     if engine == "odf" and read_ext != ".ods":
         return False
     if read_ext == ".ods" and engine != "odf":
@@ -564,7 +562,7 @@ def test_date_conversion_overflow(self, read_ext):
             columns=["DateColWithBigInt", "StringCol"],
         )
 
-        if pd.read_excel.keywords["engine"] in ["openpyxl", None]:
+        if pd.read_excel.keywords["engine"] == "openpyxl":
             pytest.xfail("Maybe not supported by openpyxl")
 
         result = pd.read_excel("testdateoverflow" + read_ext)
@@ -969,19 +967,6 @@ def test_no_header_with_list_index_col(self, read_ext):
         )
         tm.assert_frame_equal(expected, result)
 
-    def test_excel_high_surrogate(self, engine, read_ext):
-        # GH 23809
-        if read_ext != ".xlsx":
-            pytest.skip("Test is only applicable to .xlsx file")
-        if engine in ["openpyxl", None]:
-            pytest.skip("Test does not work for openpyxl")
-
-        expected = pd.DataFrame(["\udc88"], columns=["Column1"])
-
-        # should not produce a segmentation violation
-        actual = pd.read_excel("high_surrogate.xlsx")
-        tm.assert_frame_equal(expected, actual)
-
 
 class TestExcelFileRead:
     @pytest.fixture(autouse=True)
@@ -1137,6 +1122,14 @@ def test_excel_read_binary(self, engine, read_ext):
         actual = pd.read_excel(data, engine=engine)
         tm.assert_frame_equal(expected, actual)
 
+    def test_excel_high_surrogate(self, engine):
+        # GH 23809
+        expected = pd.DataFrame(["\udc88"], columns=["Column1"])
+
+        # should not produce a segmentation violation
+        actual = pd.read_excel("high_surrogate.xlsx")
+        tm.assert_frame_equal(expected, actual)
+
     @pytest.mark.parametrize("filename", ["df_empty.xlsx", "df_equals.xlsx"])
     def test_header_with_index_col(self, engine, filename):
         # GH 33476
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
@@ -351,16 +351,12 @@ def test_excel_sheet_by_name_raise(self, path, engine):
             msg = "sheet 0 not found"
             with pytest.raises(ValueError, match=msg):
                 pd.read_excel(xl, "0")
-        elif engine == "xlwt":
+        else:
             import xlrd
 
             msg = "No sheet named <'0'>"
             with pytest.raises(xlrd.XLRDError, match=msg):
                 pd.read_excel(xl, sheet_name="0")
-        else:  # openpyxl
-            msg = "Worksheet 0 does not exist."
-            with pytest.raises(KeyError, match=msg):
-                pd.read_excel(xl, sheet_name="0")
 
     def test_excel_writer_context_manager(self, frame, path):
         with ExcelWriter(path) as writer:
@@ -1216,15 +1212,8 @@ def test_bytes_io(self, engine):
         df.to_excel(writer)
         writer.save()
 
-        if engine == "xlwt":
-            read_engine = "xlrd"
-        elif engine == "xlsxwriter":
-            read_engine = "openpyxl"
-        else:
-            read_engine = engine
-
         bio.seek(0)
-        reread_df = pd.read_excel(bio, index_col=0, engine=read_engine)
+        reread_df = pd.read_excel(bio, index_col=0)
         tm.assert_frame_equal(df, reread_df)
 
     def test_write_lists_dict(self, path):

Original file line number	Diff line number	Diff line change
`@@ -144,7 +144,7 @@ Deprecations`
`144`	`144`	`~~~~~~~~~~~~`
`145`	`145`	- Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
`146`	`146`	- Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`)
`147`		-- :func:`read_excel` default engine "xlrd" is replaced by "openpyxl" because "xlrd" is deprecated (:issue:`28547`).
	`147`	+- :func:`read_excel` "xlrd" engine is deprecated for all file types that can be handled by "openpyxl" because "xlrd" is no longer maintained (:issue:`28547`).
`148`	`148`	`-`
`149`	`149`	`-`
`150`	`150`