Skip to content

Commit b139288

Browse files
committed
Merge pull request #5423 from jmcnamara/enh_excel_multi_index
ENH: Better handling of MultiIndex with Excel
2 parents e50942f + ae37d22 commit b139288

9 files changed

+256
-144
lines changed

ci/requirements-2.7.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ numexpr==2.1
88
tables==2.3.1
99
matplotlib==1.1.1
1010
openpyxl==1.6.2
11-
xlsxwriter==0.4.3
11+
xlsxwriter==0.4.6
1212
xlrd==0.9.2
1313
patsy==0.1.0
1414
html5lib==1.0b2

ci/requirements-2.7_LOCALE.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ python-dateutil
22
pytz==2013b
33
xlwt==0.7.5
44
openpyxl==1.6.2
5-
xlsxwriter==0.4.3
5+
xlsxwriter==0.4.6
66
xlrd==0.9.2
77
numpy==1.6.1
88
cython==0.19.1

ci/requirements-3.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
python-dateutil==2.1
22
pytz==2013b
33
openpyxl==1.6.2
4-
xlsxwriter==0.4.3
4+
xlsxwriter==0.4.6
55
xlrd==0.9.2
66
numpy==1.7.1
77
cython==0.19.1

ci/requirements-3.3.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
python-dateutil==2.2
22
pytz==2013b
33
openpyxl==1.6.2
4-
xlsxwriter==0.4.3
4+
xlsxwriter==0.4.6
55
xlrd==0.9.2
66
html5lib==1.0b2
77
numpy==1.8.0

doc/source/release.rst

+5
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,11 @@ Improvements to existing features
211211
by color as expected.
212212
- ``read_excel()`` now tries to convert integral floats (like ``1.0``) to int
213213
by default. (:issue:`5394`)
214+
- Excel writers now have a default option ``merge_cells`` in ``to_excel()``
215+
to merge cells in MultiIndex and Hierarchical Rows. Note: using this
216+
option it is no longer possible to round trip Excel files with merged
217+
MultiIndex and Hierarchical Rows. Set the ``merge_cells`` to ``False`` to
218+
restore the previous behaviour. (:issue:`5254`)
214219

215220
API Changes
216221
~~~~~~~~~~~

pandas/core/format.py

+98-46
Original file line numberDiff line numberDiff line change
@@ -1213,7 +1213,7 @@ def __init__(self, row, col, val,
12131213
"right": "thin",
12141214
"bottom": "thin",
12151215
"left": "thin"},
1216-
"alignment": {"horizontal": "center"}}
1216+
"alignment": {"horizontal": "center", "vertical": "top"}}
12171217

12181218

12191219
class ExcelFormatter(object):
@@ -1237,10 +1237,12 @@ class ExcelFormatter(object):
12371237
Column label for index column(s) if desired. If None is given, and
12381238
`header` and `index` are True, then the index names are used. A
12391239
sequence should be given if the DataFrame uses MultiIndex.
1240+
merge_cells : boolean, default False
1241+
Format MultiIndex and Hierarchical Rows as merged cells.
12401242
"""
12411243

12421244
def __init__(self, df, na_rep='', float_format=None, cols=None,
1243-
header=True, index=True, index_label=None):
1245+
header=True, index=True, index_label=None, merge_cells=False):
12441246
self.df = df
12451247
self.rowcounter = 0
12461248
self.na_rep = na_rep
@@ -1251,6 +1253,7 @@ def __init__(self, df, na_rep='', float_format=None, cols=None,
12511253
self.index = index
12521254
self.index_label = index_label
12531255
self.header = header
1256+
self.merge_cells = merge_cells
12541257

12551258
def _format_value(self, val):
12561259
if lib.checknull(val):
@@ -1264,29 +1267,44 @@ def _format_header_mi(self):
12641267
if not(has_aliases or self.header):
12651268
return
12661269

1267-
levels = self.columns.format(sparsify=True, adjoin=False,
1268-
names=False)
1269-
# level_lenghts = _get_level_lengths(levels)
1270-
coloffset = 1
1271-
if isinstance(self.df.index, MultiIndex):
1272-
coloffset = len(self.df.index[0])
1273-
1274-
# for lnum, (records, values) in enumerate(zip(level_lenghts,
1275-
# levels)):
1276-
# name = self.columns.names[lnum]
1277-
# yield ExcelCell(lnum, coloffset, name, header_style)
1278-
# for i in records:
1279-
# if records[i] > 1:
1280-
# yield ExcelCell(lnum,coloffset + i + 1, values[i],
1281-
# header_style, lnum, coloffset + i + records[i])
1282-
# else:
1283-
# yield ExcelCell(lnum, coloffset + i + 1, values[i], header_style)
1284-
1285-
# self.rowcounter = lnum
1270+
columns = self.columns
1271+
level_strs = columns.format(sparsify=True, adjoin=False, names=False)
1272+
level_lengths = _get_level_lengths(level_strs)
1273+
coloffset = 0
12861274
lnum = 0
1287-
for i, values in enumerate(zip(*levels)):
1288-
v = ".".join(map(com.pprint_thing, values))
1289-
yield ExcelCell(lnum, coloffset + i, v, header_style)
1275+
1276+
if isinstance(self.df.index, MultiIndex):
1277+
coloffset = len(self.df.index[0]) - 1
1278+
1279+
if self.merge_cells:
1280+
# Format multi-index as a merged cells.
1281+
for lnum in range(len(level_lengths)):
1282+
name = columns.names[lnum]
1283+
yield ExcelCell(lnum, coloffset, name, header_style)
1284+
1285+
for lnum, (spans, levels, labels) in enumerate(zip(level_lengths,
1286+
columns.levels,
1287+
columns.labels)
1288+
):
1289+
values = levels.take(labels)
1290+
for i in spans:
1291+
if spans[i] > 1:
1292+
yield ExcelCell(lnum,
1293+
coloffset + i + 1,
1294+
values[i],
1295+
header_style,
1296+
lnum,
1297+
coloffset + i + spans[i])
1298+
else:
1299+
yield ExcelCell(lnum,
1300+
coloffset + i + 1,
1301+
values[i],
1302+
header_style)
1303+
else:
1304+
# Format in legacy format with dots to indicate levels.
1305+
for i, values in enumerate(zip(*level_strs)):
1306+
v = ".".join(map(com.pprint_thing, values))
1307+
yield ExcelCell(lnum, coloffset + i + 1, v, header_style)
12901308

12911309
self.rowcounter = lnum
12921310

@@ -1354,14 +1372,17 @@ def _format_regular_rows(self):
13541372
index_label = self.df.index.names[0]
13551373

13561374
if index_label and self.header is not False:
1357-
# add to same level as column names
1358-
# if isinstance(self.df.columns, MultiIndex):
1359-
# yield ExcelCell(self.rowcounter, 0,
1360-
# index_label, header_style)
1361-
# self.rowcounter += 1
1362-
# else:
1363-
yield ExcelCell(self.rowcounter - 1, 0,
1364-
index_label, header_style)
1375+
if self.merge_cells:
1376+
yield ExcelCell(self.rowcounter,
1377+
0,
1378+
index_label,
1379+
header_style)
1380+
self.rowcounter += 1
1381+
else:
1382+
yield ExcelCell(self.rowcounter - 1,
1383+
0,
1384+
index_label,
1385+
header_style)
13651386

13661387
# write index_values
13671388
index_values = self.df.index
@@ -1383,7 +1404,7 @@ def _format_hierarchical_rows(self):
13831404
self.rowcounter += 1
13841405

13851406
gcolidx = 0
1386-
# output index and index_label?
1407+
13871408
if self.index:
13881409
index_labels = self.df.index.names
13891410
# check for aliases
@@ -1394,29 +1415,60 @@ def _format_hierarchical_rows(self):
13941415
# if index labels are not empty go ahead and dump
13951416
if (any(x is not None for x in index_labels)
13961417
and self.header is not False):
1397-
# if isinstance(self.df.columns, MultiIndex):
1398-
# self.rowcounter += 1
1399-
# else:
1400-
self.rowcounter -= 1
1418+
1419+
if not self.merge_cells:
1420+
self.rowcounter -= 1
1421+
14011422
for cidx, name in enumerate(index_labels):
1402-
yield ExcelCell(self.rowcounter, cidx,
1403-
name, header_style)
1423+
yield ExcelCell(self.rowcounter,
1424+
cidx,
1425+
name,
1426+
header_style)
14041427
self.rowcounter += 1
14051428

1406-
for indexcolvals in zip(*self.df.index):
1407-
for idx, indexcolval in enumerate(indexcolvals):
1408-
yield ExcelCell(self.rowcounter + idx, gcolidx,
1409-
indexcolval, header_style)
1410-
gcolidx += 1
1429+
if self.merge_cells:
1430+
# Format hierarchical rows as merged cells.
1431+
level_strs = self.df.index.format(sparsify=True, adjoin=False,
1432+
names=False)
1433+
level_lengths = _get_level_lengths(level_strs)
1434+
1435+
for spans, levels, labels in zip(level_lengths,
1436+
self.df.index.levels,
1437+
self.df.index.labels):
1438+
values = levels.take(labels)
1439+
for i in spans:
1440+
if spans[i] > 1:
1441+
yield ExcelCell(self.rowcounter + i,
1442+
gcolidx,
1443+
values[i],
1444+
header_style,
1445+
self.rowcounter + i + spans[i] - 1,
1446+
gcolidx)
1447+
else:
1448+
yield ExcelCell(self.rowcounter + i,
1449+
gcolidx,
1450+
values[i],
1451+
header_style)
1452+
gcolidx += 1
1453+
1454+
else:
1455+
# Format hierarchical rows with non-merged values.
1456+
for indexcolvals in zip(*self.df.index):
1457+
for idx, indexcolval in enumerate(indexcolvals):
1458+
yield ExcelCell(self.rowcounter + idx,
1459+
gcolidx,
1460+
indexcolval,
1461+
header_style)
1462+
gcolidx += 1
14111463

14121464
for colidx in range(len(self.columns)):
14131465
series = self.df.iloc[:, colidx]
14141466
for i, val in enumerate(series):
14151467
yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val)
14161468

14171469
def get_formatted_cells(self):
1418-
for cell in itertools.chain(self._format_header(), self._format_body()
1419-
):
1470+
for cell in itertools.chain(self._format_header(),
1471+
self._format_body()):
14201472
cell.val = self._format_value(cell.val)
14211473
yield cell
14221474

pandas/core/frame.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,8 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
11301130

11311131
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
11321132
float_format=None, cols=None, header=True, index=True,
1133-
index_label=None, startrow=0, startcol=0, engine=None):
1133+
index_label=None, startrow=0, startcol=0, engine=None,
1134+
merge_cells=True):
11341135
"""
11351136
Write DataFrame to a excel sheet
11361137
@@ -1161,13 +1162,15 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
11611162
write engine to use - you can also set this via the options
11621163
``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
11631164
``io.excel.xlsm.writer``.
1164-
1165+
merge_cells : boolean, default True
1166+
Write MultiIndex and Hierarchical Rows as merged cells.
11651167
11661168
Notes
11671169
-----
11681170
If passing an existing ExcelWriter object, then the sheet will be added
11691171
to the existing workbook. This can be used to save different
11701172
DataFrames to one workbook
1173+
11711174
>>> writer = ExcelWriter('output.xlsx')
11721175
>>> df1.to_excel(writer,'Sheet1')
11731176
>>> df2.to_excel(writer,'Sheet2')
@@ -1185,7 +1188,8 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
11851188
header=header,
11861189
float_format=float_format,
11871190
index=index,
1188-
index_label=index_label)
1191+
index_label=index_label,
1192+
merge_cells=merge_cells)
11891193
formatted_cells = formatter.get_formatted_cells()
11901194
excel_writer.write_cells(formatted_cells, sheet_name,
11911195
startrow=startrow, startcol=startcol)

0 commit comments

Comments
 (0)