Skip to content

Commit 994cab4

Browse files
authored
BUG: Don't merge Excel cells to a single row with merge_cells=False (#60293)
* BUG: Don't merge Excel cells to a single row with merge_cells=False * Cleanup comment * Improve test
1 parent 73da90c commit 994cab4

File tree

3 files changed

+57
-56
lines changed

3 files changed

+57
-56
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,7 @@ I/O
702702
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
703703
- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
704704
- Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
705+
- Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`)
705706

706707
Period
707708
^^^^^^

pandas/io/formats/excel.py

+22-44
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
CSSWarning,
4949
)
5050
from pandas.io.formats.format import get_level_lengths
51-
from pandas.io.formats.printing import pprint_thing
5251

5352
if TYPE_CHECKING:
5453
from pandas._typing import (
@@ -620,61 +619,43 @@ def _format_header_mi(self) -> Iterable[ExcelCell]:
620619
return
621620

622621
columns = self.columns
623-
level_strs = columns._format_multi(
624-
sparsify=self.merge_cells in {True, "columns"}, include_names=False
625-
)
622+
merge_columns = self.merge_cells in {True, "columns"}
623+
level_strs = columns._format_multi(sparsify=merge_columns, include_names=False)
626624
level_lengths = get_level_lengths(level_strs)
627625
coloffset = 0
628626
lnum = 0
629627

630628
if self.index and isinstance(self.df.index, MultiIndex):
631629
coloffset = self.df.index.nlevels - 1
632630

633-
if self.merge_cells in {True, "columns"}:
634-
# Format multi-index as a merged cells.
635-
for lnum, name in enumerate(columns.names):
636-
yield ExcelCell(
637-
row=lnum,
638-
col=coloffset,
639-
val=name,
640-
style=None,
641-
)
631+
for lnum, name in enumerate(columns.names):
632+
yield ExcelCell(
633+
row=lnum,
634+
col=coloffset,
635+
val=name,
636+
style=None,
637+
)
642638

643-
for lnum, (spans, levels, level_codes) in enumerate(
644-
zip(level_lengths, columns.levels, columns.codes)
645-
):
646-
values = levels.take(level_codes)
647-
for i, span_val in spans.items():
648-
mergestart, mergeend = None, None
649-
if span_val > 1:
650-
mergestart, mergeend = lnum, coloffset + i + span_val
651-
yield CssExcelCell(
652-
row=lnum,
653-
col=coloffset + i + 1,
654-
val=values[i],
655-
style=None,
656-
css_styles=getattr(self.styler, "ctx_columns", None),
657-
css_row=lnum,
658-
css_col=i,
659-
css_converter=self.style_converter,
660-
mergestart=mergestart,
661-
mergeend=mergeend,
662-
)
663-
else:
664-
# Format in legacy format with dots to indicate levels.
665-
for i, values in enumerate(zip(*level_strs)):
666-
v = ".".join(map(pprint_thing, values))
639+
for lnum, (spans, levels, level_codes) in enumerate(
640+
zip(level_lengths, columns.levels, columns.codes)
641+
):
642+
values = levels.take(level_codes)
643+
for i, span_val in spans.items():
644+
mergestart, mergeend = None, None
645+
if merge_columns and span_val > 1:
646+
mergestart, mergeend = lnum, coloffset + i + span_val
667647
yield CssExcelCell(
668648
row=lnum,
669649
col=coloffset + i + 1,
670-
val=v,
650+
val=values[i],
671651
style=None,
672652
css_styles=getattr(self.styler, "ctx_columns", None),
673653
css_row=lnum,
674654
css_col=i,
675655
css_converter=self.style_converter,
656+
mergestart=mergestart,
657+
mergeend=mergeend,
676658
)
677-
678659
self.rowcounter = lnum
679660

680661
def _format_header_regular(self) -> Iterable[ExcelCell]:
@@ -798,11 +779,8 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
798779

799780
# MultiIndex columns require an extra row
800781
# with index names (blank if None) for
801-
# unambiguous round-trip, unless not merging,
802-
# in which case the names all go on one row Issue #11328
803-
if isinstance(self.columns, MultiIndex) and (
804-
self.merge_cells in {True, "columns"}
805-
):
782+
# unambiguous round-trip, Issue #11328
783+
if isinstance(self.columns, MultiIndex):
806784
self.rowcounter += 1
807785

808786
# if index labels are not empty go ahead and dump

pandas/tests/io/excel/test_writers.py

+34-12
Original file line numberDiff line numberDiff line change
@@ -870,27 +870,49 @@ def test_to_excel_multiindex_nan_label(self, merge_cells, tmp_excel):
870870
# Test for Issue 11328. If column indices are integers, make
871871
# sure they are handled correctly for either setting of
872872
# merge_cells
873-
def test_to_excel_multiindex_cols(self, merge_cells, frame, tmp_excel):
873+
def test_to_excel_multiindex_cols(self, merge_cells, tmp_excel):
874+
# GH#11328
875+
frame = DataFrame(
876+
{
877+
"A": [1, 2, 3],
878+
"B": [4, 5, 6],
879+
"C": [7, 8, 9],
880+
}
881+
)
874882
arrays = np.arange(len(frame.index) * 2, dtype=np.int64).reshape(2, -1)
875883
new_index = MultiIndex.from_arrays(arrays, names=["first", "second"])
876884
frame.index = new_index
877885

878-
new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1), (50, 2)])
886+
new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1)])
879887
frame.columns = new_cols_index
880-
header = [0, 1]
881-
if not merge_cells:
882-
header = 0
883-
884-
# round trip
885888
frame.to_excel(tmp_excel, sheet_name="test1", merge_cells=merge_cells)
889+
890+
# Check round trip
891+
with ExcelFile(tmp_excel) as reader:
892+
result = pd.read_excel(
893+
reader, sheet_name="test1", header=[0, 1], index_col=[0, 1]
894+
)
895+
tm.assert_frame_equal(result, frame)
896+
897+
# GH#60274
898+
# Check with header/index_col None to determine which cells were merged
886899
with ExcelFile(tmp_excel) as reader:
887-
df = pd.read_excel(
888-
reader, sheet_name="test1", header=header, index_col=[0, 1]
900+
result = pd.read_excel(
901+
reader, sheet_name="test1", header=None, index_col=None
889902
)
903+
expected = DataFrame(
904+
{
905+
0: [np.nan, np.nan, "first", 0, 1, 2],
906+
1: [np.nan, np.nan, "second", 3, 4, 5],
907+
2: [40.0, 1.0, np.nan, 1.0, 2.0, 3.0],
908+
3: [np.nan, 2.0, np.nan, 4.0, 5.0, 6.0],
909+
4: [50.0, 1.0, np.nan, 7.0, 8.0, 9.0],
910+
}
911+
)
890912
if not merge_cells:
891-
fm = frame.columns._format_multi(sparsify=False, include_names=False)
892-
frame.columns = [".".join(map(str, q)) for q in zip(*fm)]
893-
tm.assert_frame_equal(frame, df)
913+
# MultiIndex column value is repeated
914+
expected.loc[0, 3] = 40.0
915+
tm.assert_frame_equal(result, expected)
894916

895917
def test_to_excel_multiindex_dates(self, merge_cells, tmp_excel):
896918
# try multiindex with dates

0 commit comments

Comments
 (0)