From 0109fcb2f88b1cae9e5f9707aa64ebcf18502a59 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Fri, 12 Nov 2021 15:11:40 +0100
Subject: [PATCH 1/2] BUG: read_csv raising if parse_dates is used with
 MultiIndex columns

---
 doc/source/whatsnew/v1.4.0.rst             |  1 +
 pandas/io/parsers/base_parser.py           | 10 +++++--
 pandas/tests/io/parser/test_parse_dates.py | 33 ++++++++++++++++++++++
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index d1e209adb1b8f..d3df785c23544 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -574,6 +574,7 @@ I/O
 - Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
 - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
 - Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`)
+- Bug in :func:`read_csv` raising ``ValueError`` when ``parse_dates`` was used with ``MultiIndex`` columns (:issue:`8991`)
 -
 
 Period
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 339585810bec1..b67995593d947 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -259,7 +259,8 @@ def _validate_parse_dates_presence(self, columns: list[str]) -> None:
             # ParseDates = Union[DateGroups, List[DateGroups],
             #     Dict[ColReference, DateGroups]]
             cols_needed = itertools.chain.from_iterable(
-                col if is_list_like(col) else [col] for col in self.parse_dates
+                col if is_list_like(col) and not isinstance(col, tuple) else [col]
+                for col in self.parse_dates
             )
         else:
             cols_needed = []
@@ -1091,7 +1092,7 @@ def _isindex(colspec):
     if isinstance(parse_spec, list):
         # list of column lists
         for colspec in parse_spec:
-            if is_scalar(colspec):
+            if is_scalar(colspec) or isinstance(colspec, tuple):
                 if isinstance(colspec, int) and colspec not in data_dict:
                     colspec = orig_names[colspec]
                 if _isindex(colspec):
@@ -1146,7 +1147,10 @@ def _try_convert_dates(parser: Callable, colspec, data_dict, columns):
         else:
             colnames.append(c)
 
-    new_name = "_".join([str(x) for x in colnames])
+    if all(isinstance(x, tuple) for x in colnames):
+        new_name = tuple(map("_".join, zip(*colnames)))
+    else:
+        new_name = "_".join([str(x) for x in colnames])
     to_parse = [np.asarray(data_dict[c]) for c in colnames if c in data_dict]
 
     new_col = parser(*to_parse)
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index c8bea9592e82a..470440290016d 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -1732,6 +1732,39 @@ def test_date_parser_and_names(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@skip_pyarrow
+def test_date_parser_multiindex_columns(all_parsers):
+    parser = all_parsers
+    data = """a,b
+1,2
+2019-12-31,6"""
+    result = parser.read_csv(StringIO(data), parse_dates=[("a", "1")], header=[0, 1])
+    expected = DataFrame({("a", "1"): Timestamp("2019-12-31"), ("b", "2"): [6]})
+    tm.assert_frame_equal(result, expected)
+
+
+@skip_pyarrow
+@pytest.mark.parametrize(
+    "parse_spec, col_name",
+    [
+        ([[("a", "1"), ("b", "2")]], ("a_b", "1_2")),
+        ({("foo", "1"): [("a", "1"), ("b", "2")]}, ("foo", "1")),
+    ],
+)
+def test_date_parser_multiindex_columns_combine_cols(all_parsers, parse_spec, col_name):
+    parser = all_parsers
+    data = """a,b,c
+1,2,3
+2019-12,-31,6"""
+    result = parser.read_csv(
+        StringIO(data),
+        parse_dates=parse_spec,
+        header=[0, 1],
+    )
+    expected = DataFrame({col_name: Timestamp("2019-12-31"), ("c", "3"): [6]})
+    tm.assert_frame_equal(result, expected)
+
+
 @skip_pyarrow
 def test_date_parser_usecols_thousands(all_parsers):
     # GH#39365

From ac973fbe5bbe66ac6d57ad0ac34c99db2882d211 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Fri, 12 Nov 2021 16:53:58 +0100
Subject: [PATCH 2/2] Add type hint to silence mypy

---
 pandas/io/parsers/base_parser.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index b67995593d947..ba39b6a933a81 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -1147,6 +1147,7 @@ def _try_convert_dates(parser: Callable, colspec, data_dict, columns):
         else:
             colnames.append(c)
 
+    new_name: tuple | str
     if all(isinstance(x, tuple) for x in colnames):
         new_name = tuple(map("_".join, zip(*colnames)))
     else: