Skip to content

Commit 50adb83

Browse files
authored
Bug: names of multiindex columns not set correctly when index col is not first column (#44931)
1 parent cd5a124 commit 50adb83

File tree

3 files changed

+27
-2
lines changed

3 files changed

+27
-2
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,7 @@ I/O
761761
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`)
762762
- Bug in :func:`read_csv` where reading a mixed column of booleans and missing values to a float type results in the missing values becoming 1.0 rather than NaN (:issue:`42808`, :issue:`34120`)
763763
- Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
764+
- Bug in :func:`read_csv` not setting name of :class:`MultiIndex` columns correctly when ``index_col`` is not the first column (:issue:`38549`)
764765
- Bug in :func:`read_csv` silently ignoring errors when failling to create a memory-mapped file (:issue:`44766`)
765766
- Bug in :func:`read_csv` when passing a ``tempfile.SpooledTemporaryFile`` opened in binary mode (:issue:`44748`)
766767
-

pandas/io/parsers/base_parser.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,9 @@ def extract(r):
391391
return tuple(r[i] for i in range(field_count) if i not in sic)
392392

393393
columns = list(zip(*(extract(r) for r in header)))
394-
names = ic + columns
394+
names = columns.copy()
395+
for single_ic in sorted(ic):
396+
names.insert(single_ic, single_ic)
395397

396398
# If we find unnamed columns all in a single
397399
# level, then our header was too long.
@@ -406,7 +408,9 @@ def extract(r):
406408
# Clean the column names (if we have an index_col).
407409
if len(ic):
408410
col_names = [
409-
r[0] if ((r[0] is not None) and r[0] not in self.unnamed_cols) else None
411+
r[ic[0]]
412+
if ((r[ic[0]] is not None) and r[ic[0]] not in self.unnamed_cols)
413+
else None
410414
for r in header
411415
]
412416
else:

pandas/tests/io/parser/test_index_col.py

+20
Original file line numberDiff line numberDiff line change
@@ -332,3 +332,23 @@ def test_specify_dtype_for_index_col(all_parsers, dtype, val):
332332
result = parser.read_csv(StringIO(data), index_col="a", dtype={"a": dtype})
333333
expected = DataFrame({"b": [2]}, index=Index([val], name="a"))
334334
tm.assert_frame_equal(result, expected)
335+
336+
337+
@skip_pyarrow
338+
def test_multiindex_columns_not_leading_index_col(all_parsers):
339+
# GH#38549
340+
parser = all_parsers
341+
data = """a,b,c,d
342+
e,f,g,h
343+
x,y,1,2
344+
"""
345+
result = parser.read_csv(
346+
StringIO(data),
347+
header=[0, 1],
348+
index_col=1,
349+
)
350+
cols = MultiIndex.from_tuples(
351+
[("a", "e"), ("c", "g"), ("d", "h")], names=["b", "f"]
352+
)
353+
expected = DataFrame([["x", 1, 2]], columns=cols, index=["y"])
354+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)