Skip to content

BUG: Don't merge Excel cells to a single row with merge_cells=False #60293

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,7 @@ I/O
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
- Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`)
- Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`)

Period
^^^^^^
Expand Down
66 changes: 22 additions & 44 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
CSSWarning,
)
from pandas.io.formats.format import get_level_lengths
from pandas.io.formats.printing import pprint_thing

if TYPE_CHECKING:
from pandas._typing import (
Expand Down Expand Up @@ -620,61 +619,43 @@ def _format_header_mi(self) -> Iterable[ExcelCell]:
return

columns = self.columns
level_strs = columns._format_multi(
sparsify=self.merge_cells in {True, "columns"}, include_names=False
)
merge_columns = self.merge_cells in {True, "columns"}
level_strs = columns._format_multi(sparsify=merge_columns, include_names=False)
level_lengths = get_level_lengths(level_strs)
coloffset = 0
lnum = 0

if self.index and isinstance(self.df.index, MultiIndex):
coloffset = self.df.index.nlevels - 1

if self.merge_cells in {True, "columns"}:
# Format multi-index as a merged cells.
for lnum, name in enumerate(columns.names):
yield ExcelCell(
row=lnum,
col=coloffset,
val=name,
style=None,
)
for lnum, name in enumerate(columns.names):
yield ExcelCell(
row=lnum,
col=coloffset,
val=name,
style=None,
)

for lnum, (spans, levels, level_codes) in enumerate(
zip(level_lengths, columns.levels, columns.codes)
):
values = levels.take(level_codes)
for i, span_val in spans.items():
mergestart, mergeend = None, None
if span_val > 1:
mergestart, mergeend = lnum, coloffset + i + span_val
yield CssExcelCell(
row=lnum,
col=coloffset + i + 1,
val=values[i],
style=None,
css_styles=getattr(self.styler, "ctx_columns", None),
css_row=lnum,
css_col=i,
css_converter=self.style_converter,
mergestart=mergestart,
mergeend=mergeend,
)
else:
# Format in legacy format with dots to indicate levels.
for i, values in enumerate(zip(*level_strs)):
v = ".".join(map(pprint_thing, values))
for lnum, (spans, levels, level_codes) in enumerate(
zip(level_lengths, columns.levels, columns.codes)
):
values = levels.take(level_codes)
for i, span_val in spans.items():
mergestart, mergeend = None, None
if merge_columns and span_val > 1:
mergestart, mergeend = lnum, coloffset + i + span_val
yield CssExcelCell(
row=lnum,
col=coloffset + i + 1,
val=v,
val=values[i],
style=None,
css_styles=getattr(self.styler, "ctx_columns", None),
css_row=lnum,
css_col=i,
css_converter=self.style_converter,
mergestart=mergestart,
mergeend=mergeend,
)

self.rowcounter = lnum

def _format_header_regular(self) -> Iterable[ExcelCell]:
Expand Down Expand Up @@ -798,11 +779,8 @@ def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:

# MultiIndex columns require an extra row
# with index names (blank if None) for
# unambiguous round-trip, unless not merging,
# in which case the names all go on one row Issue #11328
if isinstance(self.columns, MultiIndex) and (
self.merge_cells in {True, "columns"}
):
# unambiguous round-trip, Issue #11328
if isinstance(self.columns, MultiIndex):
self.rowcounter += 1

# if index labels are not empty go ahead and dump
Expand Down
46 changes: 34 additions & 12 deletions pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,27 +870,49 @@ def test_to_excel_multiindex_nan_label(self, merge_cells, tmp_excel):
# Test for Issue 11328. If column indices are integers, make
# sure they are handled correctly for either setting of
# merge_cells
def test_to_excel_multiindex_cols(self, merge_cells, frame, tmp_excel):
def test_to_excel_multiindex_cols(self, merge_cells, tmp_excel):
# GH#11328
frame = DataFrame(
{
"A": [1, 2, 3],
"B": [4, 5, 6],
"C": [7, 8, 9],
}
)
arrays = np.arange(len(frame.index) * 2, dtype=np.int64).reshape(2, -1)
new_index = MultiIndex.from_arrays(arrays, names=["first", "second"])
frame.index = new_index

new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1), (50, 2)])
new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1)])
frame.columns = new_cols_index
header = [0, 1]
if not merge_cells:
header = 0

# round trip
frame.to_excel(tmp_excel, sheet_name="test1", merge_cells=merge_cells)

# Check round trip
with ExcelFile(tmp_excel) as reader:
result = pd.read_excel(
reader, sheet_name="test1", header=[0, 1], index_col=[0, 1]
)
tm.assert_frame_equal(result, frame)

# GH#60274
# Check with header/index_col None to determine which cells were merged
with ExcelFile(tmp_excel) as reader:
df = pd.read_excel(
reader, sheet_name="test1", header=header, index_col=[0, 1]
result = pd.read_excel(
reader, sheet_name="test1", header=None, index_col=None
)
expected = DataFrame(
{
0: [np.nan, np.nan, "first", 0, 1, 2],
1: [np.nan, np.nan, "second", 3, 4, 5],
2: [40.0, 1.0, np.nan, 1.0, 2.0, 3.0],
3: [np.nan, 2.0, np.nan, 4.0, 5.0, 6.0],
4: [50.0, 1.0, np.nan, 7.0, 8.0, 9.0],
}
)
if not merge_cells:
fm = frame.columns._format_multi(sparsify=False, include_names=False)
frame.columns = [".".join(map(str, q)) for q in zip(*fm)]
tm.assert_frame_equal(frame, df)
# MultiIndex column value is repeated
expected.loc[0, 3] = 40.0
tm.assert_frame_equal(result, expected)

def test_to_excel_multiindex_dates(self, merge_cells, tmp_excel):
# try multiindex with dates
Expand Down