diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 2b1182414ca2f..ee78756d1fe4a 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -754,6 +754,7 @@ I/O - Bug in :func:`read_csv` raising ``AttributeError`` when attempting to read a .csv file and infer index column dtype from an nullable integer type (:issue:`44079`) - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`) - Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`) +- Bug in :func:`read_csv` not setting name of :class:`MultiIndex` columns correctly when ``index_col`` is not the first column (:issue:`38549`) - Bug in :func:`read_csv` silently ignoring errors when failling to create a memory-mapped file (:issue:`44766`) - Bug in :func:`read_csv` when passing a ``tempfile.SpooledTemporaryFile`` opened in binary mode (:issue:`44748`) - diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 5d03529654b0d..5584730be90e8 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -391,7 +391,9 @@ def extract(r): return tuple(r[i] for i in range(field_count) if i not in sic) columns = list(zip(*(extract(r) for r in header))) - names = ic + columns + names = columns.copy() + for single_ic in sorted(ic): + names.insert(single_ic, single_ic) # If we find unnamed columns all in a single # level, then our header was too long. @@ -406,7 +408,9 @@ def extract(r): # Clean the column names (if we have an index_col). if len(ic): col_names = [ - r[0] if ((r[0] is not None) and r[0] not in self.unnamed_cols) else None + r[ic[0]] + if ((r[ic[0]] is not None) and r[ic[0]] not in self.unnamed_cols) + else None for r in header ] else: diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 58b5eebbec344..f30aba3db917e 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -332,3 +332,23 @@ def test_specify_dtype_for_index_col(all_parsers, dtype, val): result = parser.read_csv(StringIO(data), index_col="a", dtype={"a": dtype}) expected = DataFrame({"b": [2]}, index=Index([val], name="a")) tm.assert_frame_equal(result, expected) + + +@skip_pyarrow +def test_multiindex_columns_not_leading_index_col(all_parsers): + # GH#38549 + parser = all_parsers + data = """a,b,c,d +e,f,g,h +x,y,1,2 +""" + result = parser.read_csv( + StringIO(data), + header=[0, 1], + index_col=1, + ) + cols = MultiIndex.from_tuples( + [("a", "e"), ("c", "g"), ("d", "h")], names=["b", "f"] + ) + expected = DataFrame([["x", 1, 2]], columns=cols, index=["y"]) + tm.assert_frame_equal(result, expected)