diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 26e548f519ecd..80e5484c5f324 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -217,6 +217,7 @@ MultiIndex I/O ^^^ +- Bug in :func:`read_csv` raising ``IndexError`` with multiple header columns and ``index_col`` specified when file has no data rows (:issue:`38292`) - Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`) - Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`) - diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 8177741b5252d..3244b1c0f65b2 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1465,7 +1465,7 @@ def _extract_multi_indexer_columns( # clean the index_names index_names = header.pop(-1) - index_names, names, index_col = _clean_index_names( + index_names, _, _ = _clean_index_names( index_names, self.index_col, self.unnamed_cols ) @@ -3464,6 +3464,11 @@ def _clean_index_names(columns, index_col, unnamed_cols): columns = list(columns) + # In case of no rows and multiindex columns we have to set index_names to + # list of Nones GH#38292 + if not columns: + return [None] * len(index_col), columns, index_col + cp_cols = list(columns) index_names = [] diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index f3191d5195308..a409751e261d6 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -226,3 +226,56 @@ def test_index_col_large_csv(all_parsers): result = parser.read_csv(path, index_col=[0]) tm.assert_frame_equal(result, df.set_index("a")) + + +def test_index_col_multiindex_columns_no_data(all_parsers): + # GH#38292 + parser = all_parsers + result = parser.read_csv( + StringIO("a0,a1,a2\nb0,b1,b2\n"), header=[0, 1], index_col=0 + ) + expected = DataFrame( + [], + columns=MultiIndex.from_arrays( + [["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"] + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_index_col_header_no_data(all_parsers): + # GH#38292 + parser = all_parsers + result = parser.read_csv(StringIO("a0,a1,a2\n"), header=[0], index_col=0) + expected = DataFrame( + [], + columns=["a1", "a2"], + index=Index([], name="a0"), + ) + tm.assert_frame_equal(result, expected) + + +def test_multiindex_columns_no_data(all_parsers): + # GH#38292 + parser = all_parsers + result = parser.read_csv(StringIO("a0,a1,a2\nb0,b1,b2\n"), header=[0, 1]) + expected = DataFrame( + [], columns=MultiIndex.from_arrays([["a0", "a1", "a2"], ["b0", "b1", "b2"]]) + ) + tm.assert_frame_equal(result, expected) + + +def test_multiindex_columns_index_col_with_data(all_parsers): + # GH#38292 + parser = all_parsers + result = parser.read_csv( + StringIO("a0,a1,a2\nb0,b1,b2\ndata,data,data"), header=[0, 1], index_col=0 + ) + expected = DataFrame( + [["data", "data"]], + columns=MultiIndex.from_arrays( + [["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"] + ), + index=Index(["data"]), + ) + tm.assert_frame_equal(result, expected)