Skip to content

Commit 1348c3b

Browse files
committed
BUG: Fix inconsistency for MultiIndex with empty values (pandas-dev#59560)
1 parent 8911238 commit 1348c3b

File tree

2 files changed

+40
-3
lines changed

2 files changed

+40
-3
lines changed

pandas/io/parsers/base_parser.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,19 @@ def extract(r):
239239
return tuple(r[i] for i in range(field_count) if i not in sic)
240240

241241
columns = list(zip(*(extract(r) for r in header)))
242+
# Replace None, empty strings, or column names starting with 'Unnamed: '
243+
# (used as placeholders in multi-index headers) with empty strings.
244+
columns = [
245+
tuple(
246+
""
247+
if level is None
248+
or str(level).strip() == ""
249+
or (isinstance(level, str) and level.startswith("Unnamed: "))
250+
else level
251+
for level in col
252+
)
253+
for col in columns
254+
]
242255
names = columns.copy()
243256
for single_ic in sorted(ic):
244257
names.insert(single_ic, single_ic)
@@ -357,7 +370,7 @@ def _agg_index(self, index) -> Index:
357370
)
358371
else:
359372
col_na_values, col_na_fvalues = set(), set()
360-
373+
col_na_values.discard("")
361374
cast_type = None
362375
index_converter = False
363376
if self.index_names is not None:
@@ -694,8 +707,11 @@ def _clean_index_names(self, columns, index_col) -> tuple[list | None, list, lis
694707

695708
# Only clean index names that were placeholders.
696709
for i, name in enumerate(index_names):
697-
if isinstance(name, str) and name in self.unnamed_cols:
698-
index_names[i] = None
710+
if isinstance(name, str):
711+
if name.strip() == "":
712+
index_names[i] = ""
713+
elif name in self.unnamed_cols:
714+
index_names[i] = None
699715

700716
return index_names, columns, index_col
701717

pandas/tests/io/parser/test_index_col.py

+21
Original file line numberDiff line numberDiff line change
@@ -375,3 +375,24 @@ def test_multiindex_columns_not_leading_index_col(all_parsers):
375375
)
376376
expected = DataFrame([["x", 1, 2]], columns=cols, index=["y"])
377377
tm.assert_frame_equal(result, expected)
378+
379+
380+
def test_multiindex_empty_values_handling(all_parsers):
381+
# GH#59560
382+
parser = all_parsers
383+
if parser.engine == "pyarrow":
384+
pytest.skip(
385+
"PyArrow engine does not support multiple header rows for MultiIndex cols."
386+
)
387+
388+
data = ", ,a,b,b\n" ", ,, ,b2\n" "i1,,0,1,2\n" "i2,,3,4,5\n"
389+
result = parser.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1])
390+
expected_columns = MultiIndex.from_tuples(
391+
[("a", ""), ("b", ""), ("b", "b2")], names=[None, None]
392+
)
393+
expected = DataFrame(
394+
[[0, 1, 2], [3, 4, 5]],
395+
index=MultiIndex.from_tuples([("i1", ""), ("i2", "")]),
396+
columns=expected_columns,
397+
)
398+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)