From 9e3e6e6582c9b8ad002039708cb7ce4f850ce34d Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Fri, 28 Oct 2022 15:49:50 -0400 Subject: [PATCH 1/8] CLN: Remove xlrd < 2.0 code --- pandas/io/excel/_base.py | 27 --------------------------- pandas/io/excel/_xlrd.py | 2 +- pandas/tests/io/excel/__init__.py | 4 ---- pandas/tests/io/excel/test_xlrd.py | 16 ---------------- 4 files changed, 1 insertion(+), 48 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index bc3abfb94f31c..3e4ce611cd105 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1597,7 +1597,6 @@ def __init__( xlrd_version = Version(get_version(xlrd)) - ext = None if engine is None: # Only determine ext if it is needed if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book): @@ -1616,32 +1615,6 @@ def __init__( if engine == "auto": engine = get_default_engine(ext, mode="reader") - if engine == "xlrd" and xlrd_version is not None: - if ext is None: - # Need ext to determine ext in order to raise/warn - if isinstance(path_or_buffer, xlrd.Book): - ext = "xls" - else: - ext = inspect_excel_format( - path_or_buffer, storage_options=storage_options - ) - - # Pass through if ext is None, otherwise check if ext valid for xlrd - if ext and ext != "xls" and xlrd_version >= Version("2"): - raise ValueError( - f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, " - f"only the xls format is supported. Install openpyxl instead." - ) - elif ext and ext != "xls": - stacklevel = find_stack_level() - warnings.warn( - f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, " - f"only the xls format is supported. Install " - f"openpyxl instead.", - FutureWarning, - stacklevel=stacklevel, - ) - assert engine is not None self.engine = engine self.storage_options = storage_options diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 171705dee6e59..f2bee75b088bd 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -30,7 +30,7 @@ def __init__( Object to be parsed. {storage_options} """ - err_msg = "Install xlrd >= 1.0.0 for Excel support" + err_msg = "Install xlrd >= 2.0.1 for Excel support" import_optional_dependency("xlrd", extra=err_msg) super().__init__(filepath_or_buffer, storage_options=storage_options) diff --git a/pandas/tests/io/excel/__init__.py b/pandas/tests/io/excel/__init__.py index 9136153101e23..419761cbe1d6d 100644 --- a/pandas/tests/io/excel/__init__.py +++ b/pandas/tests/io/excel/__init__.py @@ -9,8 +9,4 @@ pytest.mark.filterwarnings( "ignore:This method will be removed in future versions:DeprecationWarning" ), - # GH 38571 - pytest.mark.filterwarnings( - "ignore:.*In xlrd >= 2.0, only the xls format is supported:FutureWarning" - ), ] diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py index 30dddbd7de50b..17d62d335fce1 100644 --- a/pandas/tests/io/excel/test_xlrd.py +++ b/pandas/tests/io/excel/test_xlrd.py @@ -39,22 +39,6 @@ def test_read_xlrd_book(read_ext_xlrd, datapath): tm.assert_frame_equal(result, expected) -def test_excel_file_warning_with_xlsx_file(datapath): - # GH 29375 - path = datapath("io", "data", "excel", "test1.xlsx") - has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None - if not has_openpyxl: - with tm.assert_produces_warning( - FutureWarning, - raise_on_extra_warnings=False, - match="The xlrd engine is no longer maintained", - ): - ExcelFile(path, engine=None) - else: - with tm.assert_produces_warning(None): - pd.read_excel(path, "Sheet1", engine=None) - - def test_read_excel_warning_with_xlsx_file(datapath): # GH 29375 path = datapath("io", "data", "excel", "test1.xlsx") From 760eb3f2d3bbf72e883d715017451a99edbf2828 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Fri, 28 Oct 2022 15:57:06 -0400 Subject: [PATCH 2/8] Add test --- pandas/tests/io/excel/test_xlrd.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py index 17d62d335fce1..1f8fb4b801356 100644 --- a/pandas/tests/io/excel/test_xlrd.py +++ b/pandas/tests/io/excel/test_xlrd.py @@ -2,8 +2,6 @@ import pytest -from pandas.compat._optional import import_optional_dependency - import pandas as pd import pandas._testing as tm @@ -39,19 +37,13 @@ def test_read_xlrd_book(read_ext_xlrd, datapath): tm.assert_frame_equal(result, expected) -def test_read_excel_warning_with_xlsx_file(datapath): +def test_read_xlsx_fails(datapath): # GH 29375 + from xlrd.biffh import XLRDError + path = datapath("io", "data", "excel", "test1.xlsx") - has_openpyxl = import_optional_dependency("openpyxl", errors="ignore") is not None - if not has_openpyxl: - with pytest.raises( - ValueError, - match="Your version of xlrd is ", - ): - pd.read_excel(path, "Sheet1", engine=None) - else: - with tm.assert_produces_warning(None): - pd.read_excel(path, "Sheet1", engine=None) + with pytest.raises(XLRDError, match="Excel xlsx file; not supported"): + pd.read_excel(path, engine="xlrd") @pytest.mark.parametrize( From ee8579f7b69246b29a80683dd6774ec58986fca3 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Fri, 28 Oct 2022 16:12:59 -0400 Subject: [PATCH 3/8] Add warning --- pandas/io/excel/_base.py | 8 ++++++++ pandas/tests/io/excel/__init__.py | 4 ++++ pandas/tests/io/excel/test_readers.py | 6 +++++- pandas/tests/io/excel/test_xlrd.py | 12 ++++++++---- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 3e4ce611cd105..9f091aa3bc97e 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1615,6 +1615,14 @@ def __init__( if engine == "auto": engine = get_default_engine(ext, mode="reader") + if engine == "xlrd": + warnings.warn( + "The xlrd engine is no longer maintained and " + "will likely be unusable in the future", + category=UserWarning, + stacklevel=find_stack_level(), + ) + assert engine is not None self.engine = engine self.storage_options = storage_options diff --git a/pandas/tests/io/excel/__init__.py b/pandas/tests/io/excel/__init__.py index 419761cbe1d6d..23ca0682ee3c5 100644 --- a/pandas/tests/io/excel/__init__.py +++ b/pandas/tests/io/excel/__init__.py @@ -9,4 +9,8 @@ pytest.mark.filterwarnings( "ignore:This method will be removed in future versions:DeprecationWarning" ), + # GH#49376 + pytest.mark.filterwarnings( + "ignore:The xlrd engine is no longer maintained.*:UserWarning" + ), ] diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 8ad15ac05e26a..a49f343c524ad 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1317,6 +1317,8 @@ def test_read_excel_squeeze(self, read_ext): match="The squeeze argument has been deprecated " "and will be removed in a future version. " 'Append .squeeze\\("columns"\\) to the call to squeeze.\n\n', + # xlrd always raises a warning + raise_on_extra_warnings=(read_ext == "xls"), ): actual = pd.read_excel( f, sheet_name="two_columns", index_col=0, squeeze=True @@ -1685,7 +1687,9 @@ def test_corrupt_files_closed(self, engine, read_ext): with tm.ensure_clean(f"corrupt{read_ext}") as file: Path(file).write_text("corrupt") - with tm.assert_produces_warning(False): + warn = UserWarning if engine == "xlrd" else None + msg = "The xlrd engine is no longer maintained" + with tm.assert_produces_warning(warn, match=msg): try: pd.ExcelFile(file, engine=engine) except errors: diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py index 1f8fb4b801356..f869ea211e6c5 100644 --- a/pandas/tests/io/excel/test_xlrd.py +++ b/pandas/tests/io/excel/test_xlrd.py @@ -28,8 +28,10 @@ def test_read_xlrd_book(read_ext_xlrd, datapath): sheet_name = "Sheet1" pth = datapath("io", "data", "excel", "test1.xls") with xlrd.open_workbook(pth) as book: - with ExcelFile(book, engine=engine) as xl: - result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0) + msg = "The xlrd engine is no longer maintained" + with tm.assert_produces_warning(UserWarning, match=msg): + with ExcelFile(book, engine=engine) as xl: + result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0) expected = pd.read_excel( book, sheet_name=sheet_name, engine=engine, index_col=0 @@ -42,8 +44,10 @@ def test_read_xlsx_fails(datapath): from xlrd.biffh import XLRDError path = datapath("io", "data", "excel", "test1.xlsx") - with pytest.raises(XLRDError, match="Excel xlsx file; not supported"): - pd.read_excel(path, engine="xlrd") + msg = "The xlrd engine is no longer maintained" + with tm.assert_produces_warning(UserWarning, match=msg): + with pytest.raises(XLRDError, match="Excel xlsx file; not supported"): + pd.read_excel(path, engine="xlrd") @pytest.mark.parametrize( From 4478868ef2e8a2cb05c30d3f76c2c549afdd32de Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sat, 29 Oct 2022 07:21:08 -0400 Subject: [PATCH 4/8] Fix type-hint --- pandas/io/excel/_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 9f091aa3bc97e..0cc279cbdaa98 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1599,6 +1599,7 @@ def __init__( if engine is None: # Only determine ext if it is needed + ext: str | None if xlrd_version is not None and isinstance(path_or_buffer, xlrd.Book): ext = "xls" else: From d8d43cb2eeebc9e678ba7ed0aefc78adab1c27ac Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Mon, 31 Oct 2022 16:31:22 -0400 Subject: [PATCH 5/8] Update pandas/io/excel/_xlrd.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- pandas/io/excel/_xlrd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index f2bee75b088bd..c556e4c68c6c0 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -30,7 +30,7 @@ def __init__( Object to be parsed. {storage_options} """ - err_msg = "Install xlrd >= 2.0.1 for Excel support" + err_msg = "Install xlrd >= 2.0.1 for xls Excel support" import_optional_dependency("xlrd", extra=err_msg) super().__init__(filepath_or_buffer, storage_options=storage_options) From bb4f1d9dd0ebb5094e54583cba516323697240b6 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Mon, 31 Oct 2022 16:33:11 -0400 Subject: [PATCH 6/8] Remove warning --- pandas/io/excel/_base.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 0cc279cbdaa98..a392549fa041c 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1616,14 +1616,6 @@ def __init__( if engine == "auto": engine = get_default_engine(ext, mode="reader") - if engine == "xlrd": - warnings.warn( - "The xlrd engine is no longer maintained and " - "will likely be unusable in the future", - category=UserWarning, - stacklevel=find_stack_level(), - ) - assert engine is not None self.engine = engine self.storage_options = storage_options From 1a5201eeb47cb15690816d754be23c04c09ff2a9 Mon Sep 17 00:00:00 2001 From: richard Date: Mon, 31 Oct 2022 23:32:10 -0400 Subject: [PATCH 7/8] Remove warnings in tests --- pandas/tests/io/excel/test_readers.py | 6 +----- pandas/tests/io/excel/test_xlrd.py | 12 ++++-------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index a49f343c524ad..8ad15ac05e26a 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1317,8 +1317,6 @@ def test_read_excel_squeeze(self, read_ext): match="The squeeze argument has been deprecated " "and will be removed in a future version. " 'Append .squeeze\\("columns"\\) to the call to squeeze.\n\n', - # xlrd always raises a warning - raise_on_extra_warnings=(read_ext == "xls"), ): actual = pd.read_excel( f, sheet_name="two_columns", index_col=0, squeeze=True @@ -1687,9 +1685,7 @@ def test_corrupt_files_closed(self, engine, read_ext): with tm.ensure_clean(f"corrupt{read_ext}") as file: Path(file).write_text("corrupt") - warn = UserWarning if engine == "xlrd" else None - msg = "The xlrd engine is no longer maintained" - with tm.assert_produces_warning(warn, match=msg): + with tm.assert_produces_warning(False): try: pd.ExcelFile(file, engine=engine) except errors: diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py index f869ea211e6c5..1f8fb4b801356 100644 --- a/pandas/tests/io/excel/test_xlrd.py +++ b/pandas/tests/io/excel/test_xlrd.py @@ -28,10 +28,8 @@ def test_read_xlrd_book(read_ext_xlrd, datapath): sheet_name = "Sheet1" pth = datapath("io", "data", "excel", "test1.xls") with xlrd.open_workbook(pth) as book: - msg = "The xlrd engine is no longer maintained" - with tm.assert_produces_warning(UserWarning, match=msg): - with ExcelFile(book, engine=engine) as xl: - result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0) + with ExcelFile(book, engine=engine) as xl: + result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0) expected = pd.read_excel( book, sheet_name=sheet_name, engine=engine, index_col=0 @@ -44,10 +42,8 @@ def test_read_xlsx_fails(datapath): from xlrd.biffh import XLRDError path = datapath("io", "data", "excel", "test1.xlsx") - msg = "The xlrd engine is no longer maintained" - with tm.assert_produces_warning(UserWarning, match=msg): - with pytest.raises(XLRDError, match="Excel xlsx file; not supported"): - pd.read_excel(path, engine="xlrd") + with pytest.raises(XLRDError, match="Excel xlsx file; not supported"): + pd.read_excel(path, engine="xlrd") @pytest.mark.parametrize( From d24dba3665e85f902f4161db614b5df5f8df71c3 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 1 Nov 2022 16:02:29 -0400 Subject: [PATCH 8/8] Remove filter --- pandas/tests/io/excel/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/io/excel/__init__.py b/pandas/tests/io/excel/__init__.py index 23ca0682ee3c5..419761cbe1d6d 100644 --- a/pandas/tests/io/excel/__init__.py +++ b/pandas/tests/io/excel/__init__.py @@ -9,8 +9,4 @@ pytest.mark.filterwarnings( "ignore:This method will be removed in future versions:DeprecationWarning" ), - # GH#49376 - pytest.mark.filterwarnings( - "ignore:The xlrd engine is no longer maintained.*:UserWarning" - ), ]