Skip to content

Commit 4fe2f31

Browse files
authored
BUG: read_excel not forward filling index columns when no names are given (#47496)
1 parent 2856607 commit 4fe2f31

File tree

4 files changed

+36
-3
lines changed

4 files changed

+36
-3
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,7 @@ I/O
935935
- Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`)
936936
- :meth:`to_html` now excludes the ``border`` attribute from ``<table>`` elements when ``border`` keyword is set to ``False``.
937937
- Bug in :func:`read_sas` with certain types of compressed SAS7BDAT files (:issue:`35545`)
938+
- Bug in :func:`read_excel` not forward filling :class:`MultiIndex` when no names were given (:issue:`47487`)
938939
- Bug in :func:`read_sas` returned ``None`` rather than an empty DataFrame for SAS7BDAT files with zero rows (:issue:`18198`)
939940
- Bug in :class:`StataWriter` where value labels were always written with default encoding (:issue:`46750`)
940941
- Bug in :class:`StataWriterUTF8` where some valid characters were removed from variable names (:issue:`47276`)

pandas/io/excel/_base.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -788,9 +788,27 @@ def parse(
788788

789789
# If there is a MultiIndex header and an index then there is also
790790
# a row containing just the index name(s)
791-
has_index_names = (
792-
is_list_header and not is_len_one_list_header and index_col is not None
793-
)
791+
has_index_names = False
792+
if is_list_header and not is_len_one_list_header and index_col is not None:
793+
794+
index_col_list: Sequence[int]
795+
if isinstance(index_col, int):
796+
index_col_list = [index_col]
797+
else:
798+
assert isinstance(index_col, Sequence)
799+
index_col_list = index_col
800+
801+
# We have to handle mi without names. If any of the entries in the data
802+
# columns are not empty, this is a regular row
803+
assert isinstance(header, Sequence)
804+
if len(header) < len(data):
805+
potential_index_names = data[len(header)]
806+
potential_data = [
807+
x
808+
for i, x in enumerate(potential_index_names)
809+
if not control_row[i] and i not in index_col_list
810+
]
811+
has_index_names = all(x == "" or x is None for x in potential_data)
794812

795813
if is_list_like(index_col):
796814
# Forward fill values for MultiIndex index.
Binary file not shown.

pandas/tests/io/excel/test_openpyxl.py

+14
Original file line numberDiff line numberDiff line change
@@ -396,3 +396,17 @@ def test_ints_spelled_with_decimals(datapath, ext):
396396
result = pd.read_excel(path)
397397
expected = DataFrame(range(2, 12), columns=[1])
398398
tm.assert_frame_equal(result, expected)
399+
400+
401+
def test_read_multiindex_header_no_index_names(datapath, ext):
402+
# GH#47487
403+
path = datapath("io", "data", "excel", f"multiindex_no_index_names{ext}")
404+
result = pd.read_excel(path, index_col=[0, 1, 2], header=[0, 1, 2])
405+
expected = DataFrame(
406+
[[np.nan, "x", "x", "x"], ["x", np.nan, np.nan, np.nan]],
407+
columns=pd.MultiIndex.from_tuples(
408+
[("X", "Y", "A1"), ("X", "Y", "A2"), ("XX", "YY", "B1"), ("XX", "YY", "B2")]
409+
),
410+
index=pd.MultiIndex.from_tuples([("A", "AA", "AAA"), ("A", "BB", "BBB")]),
411+
)
412+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)