pandas-dev · rhshadrach · Jul 2, 2021 · May 27, 2021 · May 28, 2021 · Jun 1, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -1064,6 +1064,7 @@ I/O
 - Bug in the conversion from PyArrow to pandas (e.g. for reading Parquet) with nullable dtypes and a PyArrow array whose data buffer size is not a multiple of the dtype size (:issue:`40896`)
 - Bug in :func:`read_excel` would raise an error when pandas could not determine the file type, even when user specified the ``engine`` argument (:issue:`41225`)
 - Bug in :func:`read_clipboard` copying from an excel file shifts values into the wrong column if there are null values in first column (:issue:`41108`)
+- Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`)
 
 Period
 ^^^^^^

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -82,8 +82,9 @@
     or ``StringIO``.
 sheet_name : str, int, list, or None, default 0
     Strings are used for sheet names. Integers are used in zero-indexed
-    sheet positions. Lists of strings/integers are used to request
-    multiple sheets. Specify None to get all sheets.
+    sheet positions (chart sheets do not count as a sheet position).
+    Lists of strings/integers are used to request multiple sheets.
+    Specify None to get all worksheets.
 
     Available cases:
 
@@ -92,7 +93,7 @@
     * ``"Sheet1"``: Load sheet with name "Sheet1"
     * ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5"
       as a dict of `DataFrame`
-    * None: All sheets.
+    * None: All worksheets.
 
 header : int, list of int, default 0
     Row (0-indexed) to use for the column labels of the parsed

diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py
@@ -530,7 +530,7 @@ def load_workbook(self, filepath_or_buffer: FilePathOrBuffer):
 
     @property
     def sheet_names(self) -> list[str]:
-        return self.book.sheetnames
+        return [sheet.title for sheet in self.book.worksheets]
 
     def get_sheet_by_name(self, name: str):
         self.raise_if_bad_sheet_by_name(name)

diff --git a/pandas/tests/io/data/excel/chartsheet.xls b/pandas/tests/io/data/excel/chartsheet.xls
diff --git a/pandas/tests/io/data/excel/chartsheet.xlsb b/pandas/tests/io/data/excel/chartsheet.xlsb
diff --git a/pandas/tests/io/data/excel/chartsheet.xlsm b/pandas/tests/io/data/excel/chartsheet.xlsm
diff --git a/pandas/tests/io/data/excel/chartsheet.xlsx b/pandas/tests/io/data/excel/chartsheet.xlsx
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -1250,6 +1250,34 @@ def test_trailing_blanks(self, read_ext):
         result = pd.read_excel(file_name)
         assert result.shape == (3, 3)
 
+    def test_ignore_chartsheets_by_str(self, request, read_ext):
+        # GH 41448
+        if pd.read_excel.keywords["engine"] == "odf":
+            pytest.skip("chartsheets do not exist in the ODF format")
+        if pd.read_excel.keywords["engine"] == "pyxlsb":
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="pyxlsb can't distinguish chartsheets from worksheets"
+                )
+            )
+        with pytest.raises(ValueError, match="Worksheet named 'Chart1' not found"):
+            pd.read_excel("chartsheet" + read_ext, sheet_name="Chart1")
+
+    def test_ignore_chartsheets_by_int(self, request, read_ext):
+        # GH 41448
+        if pd.read_excel.keywords["engine"] == "odf":
+            pytest.skip("chartsheets do not exist in the ODF format")
+        if pd.read_excel.keywords["engine"] == "pyxlsb":
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="pyxlsb can't distinguish chartsheets from worksheets"
+                )
+            )
+        with pytest.raises(
+            ValueError, match="Worksheet index 1 is invalid, 1 worksheets found"
+        ):
+            pd.read_excel("chartsheet" + read_ext, sheet_name=1)
+
 
 class TestExcelFileRead:
     @pytest.fixture(autouse=True)
@@ -1501,6 +1529,19 @@ def test_engine_invalid_option(self, read_ext):
             with pd.option_context(f"io.excel{read_ext}.reader", "abc"):
                 pass
 
+    def test_ignore_chartsheets(self, request, engine, read_ext):
+        # GH 41448
+        if engine == "odf":
+            pytest.skip("chartsheets do not exist in the ODF format")
+        if engine == "pyxlsb":
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="pyxlsb can't distinguish chartsheets from worksheets"
+                )
+            )
+        with pd.ExcelFile("chartsheet" + read_ext) as excel:
+            assert excel.sheet_names == ["Sheet1"]
+
     def test_corrupt_files_closed(self, request, engine, read_ext):
         # GH41778
         errors = (BadZipFile,)