diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst index c5825d0881515..2759b243c2a5a 100644 --- a/doc/source/whatsnew/v1.2.2.rst +++ b/doc/source/whatsnew/v1.2.2.rst @@ -27,7 +27,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- :func:`pandas.read_excel` error message when a specified ``sheetname`` does not exist is now uniform across engines (:issue:`39250`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 5be8dbf152309..a324dfe4dab1e 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -425,6 +425,17 @@ def get_sheet_by_index(self, index): def get_sheet_data(self, sheet, convert_float): pass + def raise_if_bad_sheet_by_index(self, index: int) -> None: + n_sheets = len(self.sheet_names) + if index >= n_sheets: + raise ValueError( + f"Worksheet index {index} is invalid, {n_sheets} worksheets found" + ) + + def raise_if_bad_sheet_by_name(self, name: str) -> None: + if name not in self.sheet_names: + raise ValueError(f"Worksheet named '{name}' not found") + def parse( self, sheet_name=0, diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index c5c3927216850..8987d5bb42057 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -57,12 +57,14 @@ def sheet_names(self) -> List[str]: def get_sheet_by_index(self, index: int): from odf.table import Table + self.raise_if_bad_sheet_by_index(index) tables = self.book.getElementsByType(Table) return tables[index] def get_sheet_by_name(self, name: str): from odf.table import Table + self.raise_if_bad_sheet_by_name(name) tables = self.book.getElementsByType(Table) for table in tables: diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 7de958df206d5..583baf3b239d8 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -492,9 +492,11 @@ def sheet_names(self) -> List[str]: return self.book.sheetnames def get_sheet_by_name(self, name: str): + self.raise_if_bad_sheet_by_name(name) return self.book[name] def get_sheet_by_index(self, index: int): + self.raise_if_bad_sheet_by_index(index) return self.book.worksheets[index] def _convert_cell(self, cell, convert_float: bool) -> Scalar: diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index de4f7bba1a179..f77a6bd5b1ad5 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -47,9 +47,11 @@ def sheet_names(self) -> List[str]: return self.book.sheets def get_sheet_by_name(self, name: str): + self.raise_if_bad_sheet_by_name(name) return self.book.get_sheet(name) def get_sheet_by_index(self, index: int): + self.raise_if_bad_sheet_by_index(index) # pyxlsb sheets are indexed from 1 onwards # There's a fix for this in the source, but the pypi package doesn't have it return self.book.get_sheet(index + 1) diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index c655db4bc772b..5eb88a694218a 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -44,9 +44,11 @@ def sheet_names(self): return self.book.sheet_names() def get_sheet_by_name(self, name): + self.raise_if_bad_sheet_by_name(name) return self.book.sheet_by_name(name) def get_sheet_by_index(self, index): + self.raise_if_bad_sheet_by_index(index) return self.book.sheet_by_index(index) def get_sheet_data(self, sheet, convert_float): diff --git a/pandas/tests/io/excel/test_odf.py b/pandas/tests/io/excel/test_odf.py index d6c6399f082c6..c99d9ae62bf54 100644 --- a/pandas/tests/io/excel/test_odf.py +++ b/pandas/tests/io/excel/test_odf.py @@ -42,5 +42,5 @@ def test_nonexistent_sheetname_raises(read_ext): # GH-27676 # Specifying a non-existent sheet_name parameter should throw an error # with the sheet name. - with pytest.raises(ValueError, match="sheet xyz not found"): + with pytest.raises(ValueError, match="Worksheet named 'xyz' not found"): pd.read_excel("blank.ods", sheet_name="xyz") diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 8b1a96f694e71..9b3d359dc01a5 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -622,6 +622,16 @@ def test_bad_engine_raises(self, read_ext): with pytest.raises(ValueError, match="Unknown engine: foo"): pd.read_excel("", engine=bad_engine) + @pytest.mark.parametrize( + "sheet_name", + [3, [0, 3], [3, 0], "Sheet4", ["Sheet1", "Sheet4"], ["Sheet4", "Sheet1"]], + ) + def test_bad_sheetname_raises(self, read_ext, sheet_name): + # GH 39250 + msg = "Worksheet index 3 is invalid|Worksheet named 'Sheet4' not found" + with pytest.raises(ValueError, match=msg): + pd.read_excel("blank" + read_ext, sheet_name=sheet_name) + def test_missing_file_raises(self, read_ext): bad_file = f"foo{read_ext}" # CI tests with zh_CN.utf8, translates to "No such file or directory" @@ -1159,6 +1169,17 @@ def test_sheet_name(self, read_ext, df_ref): tm.assert_frame_equal(df1_parse, df_ref, check_names=False) tm.assert_frame_equal(df2_parse, df_ref, check_names=False) + @pytest.mark.parametrize( + "sheet_name", + [3, [0, 3], [3, 0], "Sheet4", ["Sheet1", "Sheet4"], ["Sheet4", "Sheet1"]], + ) + def test_bad_sheetname_raises(self, read_ext, sheet_name): + # GH 39250 + msg = "Worksheet index 3 is invalid|Worksheet named 'Sheet4' not found" + with pytest.raises(ValueError, match=msg): + with pd.ExcelFile("blank" + read_ext) as excel: + excel.parse(sheet_name=sheet_name) + def test_excel_read_buffer(self, engine, read_ext): pth = "test1" + read_ext expected = pd.read_excel(pth, sheet_name="Sheet1", index_col=0, engine=engine) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 6a2ac2f6003d7..af0de05965398 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -347,19 +347,9 @@ def test_excel_sheet_by_name_raise(self, path, engine): tm.assert_frame_equal(gt, df) - if engine == "odf": - msg = "sheet 0 not found" - with pytest.raises(ValueError, match=msg): - pd.read_excel(xl, "0") - elif engine == "xlwt": - import xlrd - - msg = "No sheet named <'0'>" - with pytest.raises(xlrd.XLRDError, match=msg): - pd.read_excel(xl, sheet_name="0") - else: - with pytest.raises(KeyError, match="Worksheet 0 does not exist."): - pd.read_excel(xl, sheet_name="0") + msg = "Worksheet named '0' not found" + with pytest.raises(ValueError, match=msg): + pd.read_excel(xl, "0") def test_excel_writer_context_manager(self, frame, path): with ExcelWriter(path) as writer: diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py index 2a1114a9570f0..1b4458d0437a1 100644 --- a/pandas/tests/io/excel/test_xlrd.py +++ b/pandas/tests/io/excel/test_xlrd.py @@ -43,9 +43,10 @@ def test_read_xlrd_book(read_ext, frame): # TODO: test for openpyxl as well def test_excel_table_sheet_by_index(datapath, read_ext): path = datapath("io", "data", "excel", f"test1{read_ext}") + msg = "Worksheet named 'invalid_sheet_name' not found" with ExcelFile(path, engine="xlrd") as excel: - with pytest.raises(xlrd.XLRDError): - pd.read_excel(excel, sheet_name="asdf") + with pytest.raises(ValueError, match=msg): + pd.read_excel(excel, sheet_name="invalid_sheet_name") def test_excel_file_warning_with_xlsx_file(datapath):