pandas-dev · jreback · Jun 4, 2020 · May 4, 2020 · May 4, 2020 · May 4, 2020
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -10,7 +10,7 @@
 import re
 import sys
 from textwrap import fill
-from typing import Any, Dict, Iterable, List, Set
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Set
 import warnings
 
 import numpy as np
@@ -20,7 +20,7 @@
 import pandas._libs.parsers as parsers
 from pandas._libs.parsers import STR_NA_VALUES
 from pandas._libs.tslibs import parsing
-from pandas._typing import FilePathOrBuffer
+from pandas._typing import FilePathOrBuffer, Union
 from pandas.errors import (
     AbstractMethodError,
     EmptyDataError,
@@ -1168,7 +1168,9 @@ def _is_index_col(col):
     return col is not None and col is not False
 
 
-def _is_potential_multi_index(columns):
+def _is_potential_multi_index(
+    columns, index_col: Optional[Union[bool, Sequence[int]]] = None
+):
     """
     Check whether or not the `columns` parameter
     could be converted into a MultiIndex.
@@ -1177,15 +1179,20 @@ def _is_potential_multi_index(columns):
     ----------
     columns : array-like
         Object which may or may not be convertible into a MultiIndex
+    index_col : None, bool or list, optional
+        Column or columns to use as the (possibly hierarchical) index
 
     Returns
     -------
     boolean : Whether or not columns could become a MultiIndex
     """
+    if index_col is None or isinstance(index_col, bool):
+        index_col = []
+
     return (
         len(columns)
         and not isinstance(columns, MultiIndex)
-        and all(isinstance(c, tuple) for c in columns)
+        and all(isinstance(c, tuple) for c in columns if c not in list(index_col))
     )
 
 
@@ -1570,7 +1577,7 @@ def _maybe_dedup_names(self, names):
         if self.mangle_dupe_cols:
             names = list(names)  # so we can index
             counts = defaultdict(int)
-            is_potential_mi = _is_potential_multi_index(names)
+            is_potential_mi = _is_potential_multi_index(names, self.index_col)
 
             for i, col in enumerate(names):
                 cur_count = counts[col]

diff --git a/pandas/tests/io/data/excel/df_empty.xlsx b/pandas/tests/io/data/excel/df_empty.xlsx
diff --git a/pandas/tests/io/data/excel/df_equals.xlsx b/pandas/tests/io/data/excel/df_equals.xlsx
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -1084,3 +1084,16 @@ def test_excel_high_surrogate(self, engine):
         # should not produce a segmentation violation
         actual = pd.read_excel("high_surrogate.xlsx")
         tm.assert_frame_equal(expected, actual)
+
+    @pytest.mark.parametrize("filename", ["df_empty.xlsx", "df_equals.xlsx"])
+    def test_header_with_index_col(self, engine, filename):
+        # GH 33476
+        idx = pd.Index(["Z"], name="I2")
+        cols = pd.MultiIndex.from_tuples(
+            [("A", "B"), ("A", "B.1")], names=["I11", "I12"]
+        )
+        expected = pd.DataFrame([[1, 3]], index=idx, columns=cols, dtype="int64")
+        result = pd.read_excel(
+            filename, sheet_name="Sheet1", index_col=0, header=[0, 1]
+        )
+        tm.assert_frame_equal(expected, result)
diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py
@@ -184,3 +184,26 @@ def test_no_multi_index_level_names_empty(all_parsers):
         expected.to_csv(path)
         result = parser.read_csv(path, index_col=[0, 1, 2])
     tm.assert_frame_equal(result, expected)
+
+
+def test_header_with_index_col(all_parsers):
+    # GH 33476
+    parser = all_parsers
+    data = """
+I11,A,A
+I12,B,B
+I2,1,3
+"""
+    midx = MultiIndex.from_tuples([("A", "B"), ("A", "B.1")], names=["I11", "I12"])
+    idx = Index(["I2"])
+    expected = DataFrame([[1, 3]], index=idx, columns=midx)
+
+    result = parser.read_csv(StringIO(data), index_col=0, header=[0, 1])
+    tm.assert_frame_equal(result, expected)
+
+    col_idx = Index(["A", "A.1"])
+    idx = Index(["I12", "I2"], name="I11")
+    expected = DataFrame([["B", "B"], ["1", "3"]], index=idx, columns=col_idx)
+
+    result = parser.read_csv(StringIO(data), index_col="I11", header=0)
+    tm.assert_frame_equal(result, expected)