Skip to content

Commit bbe6bec

Browse files
phofljreback
authored andcommitted
BUG: read_csv raising IndexError with multiple header cols, specified index_col and no data rows (pandas-dev#38325)
* BUG: read_csv raising IndexError with multiple header cols, specified index_col and no data rows * Adress comments * Add tests and move whatsnew * Revert change Co-authored-by: Jeff Reback <[email protected]>
1 parent ae33e80 commit bbe6bec

File tree

3 files changed

+60
-1
lines changed

3 files changed

+60
-1
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ MultiIndex
217217
I/O
218218
^^^
219219

220+
- Bug in :func:`read_csv` raising ``IndexError`` with multiple header columns and ``index_col`` specified when file has no data rows (:issue:`38292`)
220221
- Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`)
221222
- Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`)
222223
-

pandas/io/parsers.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1465,7 +1465,7 @@ def _extract_multi_indexer_columns(
14651465

14661466
# clean the index_names
14671467
index_names = header.pop(-1)
1468-
index_names, names, index_col = _clean_index_names(
1468+
index_names, _, _ = _clean_index_names(
14691469
index_names, self.index_col, self.unnamed_cols
14701470
)
14711471

@@ -3464,6 +3464,11 @@ def _clean_index_names(columns, index_col, unnamed_cols):
34643464

34653465
columns = list(columns)
34663466

3467+
# In case of no rows and multiindex columns we have to set index_names to
3468+
# list of Nones GH#38292
3469+
if not columns:
3470+
return [None] * len(index_col), columns, index_col
3471+
34673472
cp_cols = list(columns)
34683473
index_names = []
34693474

pandas/tests/io/parser/test_index_col.py

+53
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,56 @@ def test_index_col_large_csv(all_parsers):
226226
result = parser.read_csv(path, index_col=[0])
227227

228228
tm.assert_frame_equal(result, df.set_index("a"))
229+
230+
231+
def test_index_col_multiindex_columns_no_data(all_parsers):
232+
# GH#38292
233+
parser = all_parsers
234+
result = parser.read_csv(
235+
StringIO("a0,a1,a2\nb0,b1,b2\n"), header=[0, 1], index_col=0
236+
)
237+
expected = DataFrame(
238+
[],
239+
columns=MultiIndex.from_arrays(
240+
[["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"]
241+
),
242+
)
243+
tm.assert_frame_equal(result, expected)
244+
245+
246+
def test_index_col_header_no_data(all_parsers):
247+
# GH#38292
248+
parser = all_parsers
249+
result = parser.read_csv(StringIO("a0,a1,a2\n"), header=[0], index_col=0)
250+
expected = DataFrame(
251+
[],
252+
columns=["a1", "a2"],
253+
index=Index([], name="a0"),
254+
)
255+
tm.assert_frame_equal(result, expected)
256+
257+
258+
def test_multiindex_columns_no_data(all_parsers):
259+
# GH#38292
260+
parser = all_parsers
261+
result = parser.read_csv(StringIO("a0,a1,a2\nb0,b1,b2\n"), header=[0, 1])
262+
expected = DataFrame(
263+
[], columns=MultiIndex.from_arrays([["a0", "a1", "a2"], ["b0", "b1", "b2"]])
264+
)
265+
tm.assert_frame_equal(result, expected)
266+
267+
268+
def test_multiindex_columns_index_col_with_data(all_parsers):
269+
# GH#38292
270+
parser = all_parsers
271+
result = parser.read_csv(
272+
StringIO("a0,a1,a2\nb0,b1,b2\ndata,data,data"), header=[0, 1], index_col=0
273+
)
274+
expected = DataFrame(
275+
[["data", "data"]],
276+
columns=MultiIndex.from_arrays(
277+
[["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"]
278+
),
279+
index=Index(["data"]),
280+
)
281+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)