Skip to content

Commit d36acbb

Browse files
authored
BUG: read_csv raising if parse_dates is used with MultiIndex columns (#44408)
1 parent b7b2e9b commit d36acbb

File tree

3 files changed

+42
-3
lines changed

3 files changed

+42
-3
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,7 @@ I/O
634634
- Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
635635
- Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
636636
- Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`)
637+
- Bug in :func:`read_csv` raising ``ValueError`` when ``parse_dates`` was used with ``MultiIndex`` columns (:issue:`8991`)
637638
-
638639

639640
Period

pandas/io/parsers/base_parser.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,8 @@ def _validate_parse_dates_presence(self, columns: list[str]) -> None:
260260
# ParseDates = Union[DateGroups, List[DateGroups],
261261
# Dict[ColReference, DateGroups]]
262262
cols_needed = itertools.chain.from_iterable(
263-
col if is_list_like(col) else [col] for col in self.parse_dates
263+
col if is_list_like(col) and not isinstance(col, tuple) else [col]
264+
for col in self.parse_dates
264265
)
265266
else:
266267
cols_needed = []
@@ -1092,7 +1093,7 @@ def _isindex(colspec):
10921093
if isinstance(parse_spec, list):
10931094
# list of column lists
10941095
for colspec in parse_spec:
1095-
if is_scalar(colspec):
1096+
if is_scalar(colspec) or isinstance(colspec, tuple):
10961097
if isinstance(colspec, int) and colspec not in data_dict:
10971098
colspec = orig_names[colspec]
10981099
if _isindex(colspec):
@@ -1147,7 +1148,11 @@ def _try_convert_dates(parser: Callable, colspec, data_dict, columns):
11471148
else:
11481149
colnames.append(c)
11491150

1150-
new_name = "_".join([str(x) for x in colnames])
1151+
new_name: tuple | str
1152+
if all(isinstance(x, tuple) for x in colnames):
1153+
new_name = tuple(map("_".join, zip(*colnames)))
1154+
else:
1155+
new_name = "_".join([str(x) for x in colnames])
11511156
to_parse = [np.asarray(data_dict[c]) for c in colnames if c in data_dict]
11521157

11531158
new_col = parser(*to_parse)

pandas/tests/io/parser/test_parse_dates.py

+33
Original file line numberDiff line numberDiff line change
@@ -1732,6 +1732,39 @@ def test_date_parser_and_names(all_parsers):
17321732
tm.assert_frame_equal(result, expected)
17331733

17341734

1735+
@skip_pyarrow
1736+
def test_date_parser_multiindex_columns(all_parsers):
1737+
parser = all_parsers
1738+
data = """a,b
1739+
1,2
1740+
2019-12-31,6"""
1741+
result = parser.read_csv(StringIO(data), parse_dates=[("a", "1")], header=[0, 1])
1742+
expected = DataFrame({("a", "1"): Timestamp("2019-12-31"), ("b", "2"): [6]})
1743+
tm.assert_frame_equal(result, expected)
1744+
1745+
1746+
@skip_pyarrow
1747+
@pytest.mark.parametrize(
1748+
"parse_spec, col_name",
1749+
[
1750+
([[("a", "1"), ("b", "2")]], ("a_b", "1_2")),
1751+
({("foo", "1"): [("a", "1"), ("b", "2")]}, ("foo", "1")),
1752+
],
1753+
)
1754+
def test_date_parser_multiindex_columns_combine_cols(all_parsers, parse_spec, col_name):
1755+
parser = all_parsers
1756+
data = """a,b,c
1757+
1,2,3
1758+
2019-12,-31,6"""
1759+
result = parser.read_csv(
1760+
StringIO(data),
1761+
parse_dates=parse_spec,
1762+
header=[0, 1],
1763+
)
1764+
expected = DataFrame({col_name: Timestamp("2019-12-31"), ("c", "3"): [6]})
1765+
tm.assert_frame_equal(result, expected)
1766+
1767+
17351768
@skip_pyarrow
17361769
def test_date_parser_usecols_thousands(all_parsers):
17371770
# GH#39365

0 commit comments

Comments
 (0)