Skip to content

Commit ce5a27e

Browse files
authored
BUG: read_excel forward-filling MI names (#38517)
Closes #34673
1 parent 2b4bcf2 commit ce5a27e

File tree

8 files changed

+46
-2
lines changed

8 files changed

+46
-2
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ I/O
267267
- Bug in :func:`to_hdf` raising ``KeyError`` when trying to apply
268268
for subclasses of ``DataFrame`` or ``Series`` (:issue:`33748`).
269269
- Bug in :func:`json_normalize` resulting in the first element of a generator object not being included in the returned ``DataFrame`` (:issue:`35923`)
270+
- Bug in :func:`read_excel` forward filling :class:`MultiIndex` names with multiple header and index columns specified (:issue:`34673`)
270271

271272

272273
Period

pandas/io/excel/_base.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,8 @@ def parse(
504504
header_name, _ = pop_header_name(data[row], index_col)
505505
header_names.append(header_name)
506506

507+
has_index_names = is_list_like(header) and len(header) > 1
508+
507509
if is_list_like(index_col):
508510
# Forward fill values for MultiIndex index.
509511
if header is None:
@@ -513,6 +515,12 @@ def parse(
513515
else:
514516
offset = 1 + max(header)
515517

518+
# GH34673: if MultiIndex names present and not defined in the header,
519+
# offset needs to be incremented so that forward filling starts
520+
# from the first MI value instead of the name
521+
if has_index_names:
522+
offset += 1
523+
516524
# Check if we have an empty dataset
517525
# before trying to collect data.
518526
if offset < len(data):
@@ -525,8 +533,6 @@ def parse(
525533
else:
526534
last = data[row][col]
527535

528-
has_index_names = is_list_like(header) and len(header) > 1
529-
530536
# GH 12292 : error when read one empty column from excel file
531537
try:
532538
parser = TextParser(
735 Bytes
Binary file not shown.
-512 Bytes
Binary file not shown.
2.82 KB
Binary file not shown.
2.22 KB
Binary file not shown.
3.13 KB
Binary file not shown.

pandas/tests/io/excel/test_readers.py

+37
Original file line numberDiff line numberDiff line change
@@ -841,6 +841,43 @@ def test_read_excel_multiindex(self, read_ext):
841841
)
842842
tm.assert_frame_equal(actual, expected)
843843

844+
@pytest.mark.parametrize(
845+
"sheet_name,idx_lvl2",
846+
[
847+
("both_name_blank_after_mi_name", [np.nan, "b", "a", "b"]),
848+
("both_name_multiple_blanks", [np.nan] * 4),
849+
],
850+
)
851+
def test_read_excel_multiindex_blank_after_name(
852+
self, read_ext, sheet_name, idx_lvl2
853+
):
854+
# GH34673
855+
if pd.read_excel.keywords["engine"] == "pyxlsb":
856+
pytest.xfail("Sheets containing datetimes not supported by pyxlsb (GH4679")
857+
858+
mi_file = "testmultiindex" + read_ext
859+
mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]], names=["c1", "c2"])
860+
expected = DataFrame(
861+
[
862+
[1, 2.5, pd.Timestamp("2015-01-01"), True],
863+
[2, 3.5, pd.Timestamp("2015-01-02"), False],
864+
[3, 4.5, pd.Timestamp("2015-01-03"), False],
865+
[4, 5.5, pd.Timestamp("2015-01-04"), True],
866+
],
867+
columns=mi,
868+
index=MultiIndex.from_arrays(
869+
(["foo", "foo", "bar", "bar"], idx_lvl2),
870+
names=["ilvl1", "ilvl2"],
871+
),
872+
)
873+
result = pd.read_excel(
874+
mi_file,
875+
sheet_name=sheet_name,
876+
index_col=[0, 1],
877+
header=[0, 1],
878+
)
879+
tm.assert_frame_equal(result, expected)
880+
844881
def test_read_excel_multiindex_header_only(self, read_ext):
845882
# see gh-11733.
846883
#

0 commit comments

Comments
 (0)