Skip to content

Backport PR #39482: ERR: Unify error message for bad excel sheetnames #39536

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.2.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Fixed regressions
Bug fixes
~~~~~~~~~

-
- :func:`pandas.read_excel` error message when a specified ``sheetname`` does not exist is now uniform across engines (:issue:`39250`)
-

.. ---------------------------------------------------------------------------
Expand Down
11 changes: 11 additions & 0 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,17 @@ def get_sheet_by_index(self, index):
def get_sheet_data(self, sheet, convert_float):
pass

def raise_if_bad_sheet_by_index(self, index: int) -> None:
n_sheets = len(self.sheet_names)
if index >= n_sheets:
raise ValueError(
f"Worksheet index {index} is invalid, {n_sheets} worksheets found"
)

def raise_if_bad_sheet_by_name(self, name: str) -> None:
if name not in self.sheet_names:
raise ValueError(f"Worksheet named '{name}' not found")

def parse(
self,
sheet_name=0,
Expand Down
2 changes: 2 additions & 0 deletions pandas/io/excel/_odfreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,14 @@ def sheet_names(self) -> List[str]:
def get_sheet_by_index(self, index: int):
from odf.table import Table

self.raise_if_bad_sheet_by_index(index)
tables = self.book.getElementsByType(Table)
return tables[index]

def get_sheet_by_name(self, name: str):
from odf.table import Table

self.raise_if_bad_sheet_by_name(name)
tables = self.book.getElementsByType(Table)

for table in tables:
Expand Down
2 changes: 2 additions & 0 deletions pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,9 +492,11 @@ def sheet_names(self) -> List[str]:
return self.book.sheetnames

def get_sheet_by_name(self, name: str):
self.raise_if_bad_sheet_by_name(name)
return self.book[name]

def get_sheet_by_index(self, index: int):
self.raise_if_bad_sheet_by_index(index)
return self.book.worksheets[index]

def _convert_cell(self, cell, convert_float: bool) -> Scalar:
Expand Down
2 changes: 2 additions & 0 deletions pandas/io/excel/_pyxlsb.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ def sheet_names(self) -> List[str]:
return self.book.sheets

def get_sheet_by_name(self, name: str):
self.raise_if_bad_sheet_by_name(name)
return self.book.get_sheet(name)

def get_sheet_by_index(self, index: int):
self.raise_if_bad_sheet_by_index(index)
# pyxlsb sheets are indexed from 1 onwards
# There's a fix for this in the source, but the pypi package doesn't have it
return self.book.get_sheet(index + 1)
Expand Down
2 changes: 2 additions & 0 deletions pandas/io/excel/_xlrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ def sheet_names(self):
return self.book.sheet_names()

def get_sheet_by_name(self, name):
self.raise_if_bad_sheet_by_name(name)
return self.book.sheet_by_name(name)

def get_sheet_by_index(self, index):
self.raise_if_bad_sheet_by_index(index)
return self.book.sheet_by_index(index)

def get_sheet_data(self, sheet, convert_float):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/excel/test_odf.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,5 @@ def test_nonexistent_sheetname_raises(read_ext):
# GH-27676
# Specifying a non-existent sheet_name parameter should throw an error
# with the sheet name.
with pytest.raises(ValueError, match="sheet xyz not found"):
with pytest.raises(ValueError, match="Worksheet named 'xyz' not found"):
pd.read_excel("blank.ods", sheet_name="xyz")
21 changes: 21 additions & 0 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,16 @@ def test_bad_engine_raises(self, read_ext):
with pytest.raises(ValueError, match="Unknown engine: foo"):
pd.read_excel("", engine=bad_engine)

@pytest.mark.parametrize(
"sheet_name",
[3, [0, 3], [3, 0], "Sheet4", ["Sheet1", "Sheet4"], ["Sheet4", "Sheet1"]],
)
def test_bad_sheetname_raises(self, read_ext, sheet_name):
# GH 39250
msg = "Worksheet index 3 is invalid|Worksheet named 'Sheet4' not found"
with pytest.raises(ValueError, match=msg):
pd.read_excel("blank" + read_ext, sheet_name=sheet_name)

def test_missing_file_raises(self, read_ext):
bad_file = f"foo{read_ext}"
# CI tests with zh_CN.utf8, translates to "No such file or directory"
Expand Down Expand Up @@ -1159,6 +1169,17 @@ def test_sheet_name(self, read_ext, df_ref):
tm.assert_frame_equal(df1_parse, df_ref, check_names=False)
tm.assert_frame_equal(df2_parse, df_ref, check_names=False)

@pytest.mark.parametrize(
"sheet_name",
[3, [0, 3], [3, 0], "Sheet4", ["Sheet1", "Sheet4"], ["Sheet4", "Sheet1"]],
)
def test_bad_sheetname_raises(self, read_ext, sheet_name):
# GH 39250
msg = "Worksheet index 3 is invalid|Worksheet named 'Sheet4' not found"
with pytest.raises(ValueError, match=msg):
with pd.ExcelFile("blank" + read_ext) as excel:
excel.parse(sheet_name=sheet_name)

def test_excel_read_buffer(self, engine, read_ext):
pth = "test1" + read_ext
expected = pd.read_excel(pth, sheet_name="Sheet1", index_col=0, engine=engine)
Expand Down
16 changes: 3 additions & 13 deletions pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,19 +347,9 @@ def test_excel_sheet_by_name_raise(self, path, engine):

tm.assert_frame_equal(gt, df)

if engine == "odf":
msg = "sheet 0 not found"
with pytest.raises(ValueError, match=msg):
pd.read_excel(xl, "0")
elif engine == "xlwt":
import xlrd

msg = "No sheet named <'0'>"
with pytest.raises(xlrd.XLRDError, match=msg):
pd.read_excel(xl, sheet_name="0")
else:
with pytest.raises(KeyError, match="Worksheet 0 does not exist."):
pd.read_excel(xl, sheet_name="0")
msg = "Worksheet named '0' not found"
with pytest.raises(ValueError, match=msg):
pd.read_excel(xl, "0")

def test_excel_writer_context_manager(self, frame, path):
with ExcelWriter(path) as writer:
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/io/excel/test_xlrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ def test_read_xlrd_book(read_ext, frame):
# TODO: test for openpyxl as well
def test_excel_table_sheet_by_index(datapath, read_ext):
path = datapath("io", "data", "excel", f"test1{read_ext}")
msg = "Worksheet named 'invalid_sheet_name' not found"
with ExcelFile(path, engine="xlrd") as excel:
with pytest.raises(xlrd.XLRDError):
pd.read_excel(excel, sheet_name="asdf")
with pytest.raises(ValueError, match=msg):
pd.read_excel(excel, sheet_name="invalid_sheet_name")


def test_excel_file_warning_with_xlsx_file(datapath):
Expand Down