Skip to content

ENH: Better handling of MultiIndex with Excel #5423

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 6, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/requirements-2.7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ numexpr==2.1
tables==2.3.1
matplotlib==1.1.1
openpyxl==1.6.2
xlsxwriter==0.4.3
xlsxwriter==0.4.6
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume this is the current version.....not sure if you care about testing with a previous version (or even if it matters)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there are a few recent changes in XlsxWriter that are worth picking up. They don't affect any of the functionality here though. All tests will pass with versions from 0.4.3 onwards.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's fine.

xlrd==0.9.2
patsy==0.1.0
html5lib==1.0b2
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements-2.7_LOCALE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ python-dateutil
pytz==2013b
xlwt==0.7.5
openpyxl==1.6.2
xlsxwriter==0.4.3
xlsxwriter==0.4.6
xlrd==0.9.2
numpy==1.6.1
cython==0.19.1
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements-3.2.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
python-dateutil==2.1
pytz==2013b
openpyxl==1.6.2
xlsxwriter==0.4.3
xlsxwriter==0.4.6
xlrd==0.9.2
numpy==1.7.1
cython==0.19.1
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements-3.3.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
python-dateutil==2.2
pytz==2013b
openpyxl==1.6.2
xlsxwriter==0.4.3
xlsxwriter==0.4.6
xlrd==0.9.2
html5lib==1.0b2
numpy==1.8.0
Expand Down
5 changes: 5 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,11 @@ Improvements to existing features
by color as expected.
- ``read_excel()`` now tries to convert integral floats (like ``1.0``) to int
by default. (:issue:`5394`)
- Excel writers now have a default option ``merge_cells`` in ``to_excel()``
to merge cells in MultiIndex and Hierarchical Rows. Note: using this
option it is no longer possible to round trip Excel files with merged
MultiIndex and Hierarchical Rows. Set the ``merge_cells`` to ``False`` to
restore the previous behaviour. (:issue:`5254`)

API Changes
~~~~~~~~~~~
Expand Down
144 changes: 98 additions & 46 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,7 @@ def __init__(self, row, col, val,
"right": "thin",
"bottom": "thin",
"left": "thin"},
"alignment": {"horizontal": "center"}}
"alignment": {"horizontal": "center", "vertical": "top"}}


class ExcelFormatter(object):
Expand All @@ -1237,10 +1237,12 @@ class ExcelFormatter(object):
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex.
merge_cells : boolean, default False
Format MultiIndex and Hierarchical Rows as merged cells.
"""

def __init__(self, df, na_rep='', float_format=None, cols=None,
header=True, index=True, index_label=None):
header=True, index=True, index_label=None, merge_cells=False):
self.df = df
self.rowcounter = 0
self.na_rep = na_rep
Expand All @@ -1251,6 +1253,7 @@ def __init__(self, df, na_rep='', float_format=None, cols=None,
self.index = index
self.index_label = index_label
self.header = header
self.merge_cells = merge_cells

def _format_value(self, val):
if lib.checknull(val):
Expand All @@ -1264,29 +1267,44 @@ def _format_header_mi(self):
if not(has_aliases or self.header):
return

levels = self.columns.format(sparsify=True, adjoin=False,
names=False)
# level_lenghts = _get_level_lengths(levels)
coloffset = 1
if isinstance(self.df.index, MultiIndex):
coloffset = len(self.df.index[0])

# for lnum, (records, values) in enumerate(zip(level_lenghts,
# levels)):
# name = self.columns.names[lnum]
# yield ExcelCell(lnum, coloffset, name, header_style)
# for i in records:
# if records[i] > 1:
# yield ExcelCell(lnum,coloffset + i + 1, values[i],
# header_style, lnum, coloffset + i + records[i])
# else:
# yield ExcelCell(lnum, coloffset + i + 1, values[i], header_style)

# self.rowcounter = lnum
columns = self.columns
level_strs = columns.format(sparsify=True, adjoin=False, names=False)
level_lengths = _get_level_lengths(level_strs)
coloffset = 0
lnum = 0
for i, values in enumerate(zip(*levels)):
v = ".".join(map(com.pprint_thing, values))
yield ExcelCell(lnum, coloffset + i, v, header_style)

if isinstance(self.df.index, MultiIndex):
coloffset = len(self.df.index[0]) - 1

if self.merge_cells:
# Format multi-index as a merged cells.
for lnum in range(len(level_lengths)):
name = columns.names[lnum]
yield ExcelCell(lnum, coloffset, name, header_style)

for lnum, (spans, levels, labels) in enumerate(zip(level_lengths,
columns.levels,
columns.labels)
):
values = levels.take(labels)
for i in spans:
if spans[i] > 1:
yield ExcelCell(lnum,
coloffset + i + 1,
values[i],
header_style,
lnum,
coloffset + i + spans[i])
else:
yield ExcelCell(lnum,
coloffset + i + 1,
values[i],
header_style)
else:
# Format in legacy format with dots to indicate levels.
for i, values in enumerate(zip(*level_strs)):
v = ".".join(map(com.pprint_thing, values))
yield ExcelCell(lnum, coloffset + i + 1, v, header_style)

self.rowcounter = lnum

Expand Down Expand Up @@ -1354,14 +1372,17 @@ def _format_regular_rows(self):
index_label = self.df.index.names[0]

if index_label and self.header is not False:
# add to same level as column names
# if isinstance(self.df.columns, MultiIndex):
# yield ExcelCell(self.rowcounter, 0,
# index_label, header_style)
# self.rowcounter += 1
# else:
yield ExcelCell(self.rowcounter - 1, 0,
index_label, header_style)
if self.merge_cells:
yield ExcelCell(self.rowcounter,
0,
index_label,
header_style)
self.rowcounter += 1
else:
yield ExcelCell(self.rowcounter - 1,
0,
index_label,
header_style)

# write index_values
index_values = self.df.index
Expand All @@ -1383,7 +1404,7 @@ def _format_hierarchical_rows(self):
self.rowcounter += 1

gcolidx = 0
# output index and index_label?

if self.index:
index_labels = self.df.index.names
# check for aliases
Expand All @@ -1394,29 +1415,60 @@ def _format_hierarchical_rows(self):
# if index labels are not empty go ahead and dump
if (any(x is not None for x in index_labels)
and self.header is not False):
# if isinstance(self.df.columns, MultiIndex):
# self.rowcounter += 1
# else:
self.rowcounter -= 1

if not self.merge_cells:
self.rowcounter -= 1

for cidx, name in enumerate(index_labels):
yield ExcelCell(self.rowcounter, cidx,
name, header_style)
yield ExcelCell(self.rowcounter,
cidx,
name,
header_style)
self.rowcounter += 1

for indexcolvals in zip(*self.df.index):
for idx, indexcolval in enumerate(indexcolvals):
yield ExcelCell(self.rowcounter + idx, gcolidx,
indexcolval, header_style)
gcolidx += 1
if self.merge_cells:
# Format hierarchical rows as merged cells.
level_strs = self.df.index.format(sparsify=True, adjoin=False,
names=False)
level_lengths = _get_level_lengths(level_strs)

for spans, levels, labels in zip(level_lengths,
self.df.index.levels,
self.df.index.labels):
values = levels.take(labels)
for i in spans:
if spans[i] > 1:
yield ExcelCell(self.rowcounter + i,
gcolidx,
values[i],
header_style,
self.rowcounter + i + spans[i] - 1,
gcolidx)
else:
yield ExcelCell(self.rowcounter + i,
gcolidx,
values[i],
header_style)
gcolidx += 1

else:
# Format hierarchical rows with non-merged values.
for indexcolvals in zip(*self.df.index):
for idx, indexcolval in enumerate(indexcolvals):
yield ExcelCell(self.rowcounter + idx,
gcolidx,
indexcolval,
header_style)
gcolidx += 1

for colidx in range(len(self.columns)):
series = self.df.iloc[:, colidx]
for i, val in enumerate(series):
yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val)

def get_formatted_cells(self):
for cell in itertools.chain(self._format_header(), self._format_body()
):
for cell in itertools.chain(self._format_header(),
self._format_body()):
cell.val = self._format_value(cell.val)
yield cell

Expand Down
10 changes: 7 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,7 +1130,8 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,

def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
float_format=None, cols=None, header=True, index=True,
index_label=None, startrow=0, startcol=0, engine=None):
index_label=None, startrow=0, startcol=0, engine=None,
merge_cells=True):
"""
Write DataFrame to a excel sheet

Expand Down Expand Up @@ -1161,13 +1162,15 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
write engine to use - you can also set this via the options
``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
``io.excel.xlsm.writer``.

merge_cells : boolean, default True
Write MultiIndex and Hierarchical Rows as merged cells.

Notes
-----
If passing an existing ExcelWriter object, then the sheet will be added
to the existing workbook. This can be used to save different
DataFrames to one workbook

>>> writer = ExcelWriter('output.xlsx')
>>> df1.to_excel(writer,'Sheet1')
>>> df2.to_excel(writer,'Sheet2')
Expand All @@ -1185,7 +1188,8 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
header=header,
float_format=float_format,
index=index,
index_label=index_label)
index_label=index_label,
merge_cells=merge_cells)
formatted_cells = formatter.get_formatted_cells()
excel_writer.write_cells(formatted_cells, sheet_name,
startrow=startrow, startcol=startcol)
Expand Down
Loading