Skip to content

Commit b63206b

Browse files
Merge pull request #10570 from flamingbear/10564-allow-multiindex-excel-writing
COMPAT: Allow multi-indexes to be written to excel
2 parents f30e423 + 0e6cd60 commit b63206b

File tree

4 files changed

+76
-25
lines changed

4 files changed

+76
-25
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,7 @@ Other API Changes
521521
- Enable serialization of lists and dicts to strings in ExcelWriter (:issue:`8188`)
522522
- Allow passing `kwargs` to the interpolation methods (:issue:`10378`).
523523
- Serialize metadata properties of subclasses of pandas objects (:issue:`10553`).
524+
- Allow ``DataFrame`` with ``MultiIndex`` columns to be written to Excel (:issue: `10564`). This was changed in 0.16.2 as the read-back method could not always guarantee perfect fidelity (:issue:`9794`).
524525
- ``Categorical.unique`` now returns new ``Categorical`` which ``categories`` and ``codes`` are unique, rather than returning ``np.array`` (:issue:`10508`)
525526

526527
- unordered category: values and categories are sorted by appearance order.

pandas/core/format.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# pylint: disable=W0141
55

66
import sys
7+
import warnings
78

89
from pandas.core.base import PandasObject
910
from pandas.core.common import adjoin, notnull
@@ -1640,11 +1641,14 @@ class ExcelFormatter(object):
16401641
inf_rep : string, default `'inf'`
16411642
representation for np.inf values (which aren't representable in Excel)
16421643
A `'-'` sign will be added in front of -inf.
1644+
verbose: boolean, default True
1645+
If True, warn user that the resulting output file may not be
1646+
re-read or parsed directly by pandas.
16431647
"""
16441648

16451649
def __init__(self, df, na_rep='', float_format=None, cols=None,
16461650
header=True, index=True, index_label=None, merge_cells=False,
1647-
inf_rep='inf'):
1651+
inf_rep='inf', verbose=True):
16481652
self.df = df
16491653
self.rowcounter = 0
16501654
self.na_rep = na_rep
@@ -1657,6 +1661,7 @@ def __init__(self, df, na_rep='', float_format=None, cols=None,
16571661
self.header = header
16581662
self.merge_cells = merge_cells
16591663
self.inf_rep = inf_rep
1664+
self.verbose = verbose
16601665

16611666
def _format_value(self, val):
16621667
if lib.checknull(val):
@@ -1671,6 +1676,17 @@ def _format_value(self, val):
16711676
return val
16721677

16731678
def _format_header_mi(self):
1679+
1680+
if self.columns.nlevels > 1:
1681+
if not self.index:
1682+
raise NotImplementedError("Writing to Excel with MultiIndex"
1683+
" columns and no index ('index'=False) "
1684+
"is not yet implemented.")
1685+
elif self.index and self.verbose:
1686+
warnings.warn("Writing to Excel with MultiIndex columns is a"
1687+
" one way serializable operation. You will not"
1688+
" be able to re-read or parse the output file.")
1689+
16741690
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index))
16751691
if not(has_aliases or self.header):
16761692
return

pandas/core/frame.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -1254,7 +1254,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
12541254
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
12551255
float_format=None, columns=None, header=True, index=True,
12561256
index_label=None, startrow=0, startcol=0, engine=None,
1257-
merge_cells=True, encoding=None, inf_rep='inf'):
1257+
merge_cells=True, encoding=None, inf_rep='inf',
1258+
verbose=True):
12581259
"""
12591260
Write DataFrame to a excel sheet
12601261
@@ -1295,6 +1296,9 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
12951296
inf_rep : string, default 'inf'
12961297
Representation for infinity (there is no native representation for
12971298
infinity in Excel)
1299+
verbose: boolean, default True
1300+
If True, warn user that the resulting output file may not be
1301+
re-read or parsed directly by pandas.
12981302
12991303
Notes
13001304
-----
@@ -1311,12 +1315,8 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
13111315
strings before writing.
13121316
"""
13131317
from pandas.io.excel import ExcelWriter
1314-
if self.columns.nlevels > 1:
1315-
raise NotImplementedError("Writing as Excel with a MultiIndex is "
1316-
"not yet implemented.")
1317-
13181318
need_save = False
1319-
if encoding == None:
1319+
if encoding is None:
13201320
encoding = 'ascii'
13211321

13221322
if isinstance(excel_writer, compat.string_types):
@@ -1331,7 +1331,7 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
13311331
index=index,
13321332
index_label=index_label,
13331333
merge_cells=merge_cells,
1334-
inf_rep=inf_rep)
1334+
inf_rep=inf_rep, verbose=verbose)
13351335
formatted_cells = formatter.get_formatted_cells()
13361336
excel_writer.write_cells(formatted_cells, sheet_name,
13371337
startrow=startrow, startcol=startcol)

pandas/io/tests/test_excel.py

+51-17
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,6 @@ def test_read_from_file_url(self):
316316

317317
tm.assert_frame_equal(url_table, local_table)
318318

319-
320319
def test_xlsx_table(self):
321320
_skip_if_no_xlrd()
322321
_skip_if_no_openpyxl()
@@ -1145,10 +1144,10 @@ def test_excel_010_hemstring(self):
11451144
# ensure limited functionality in 0.10
11461145
# override of #2370 until sorted out in 0.11
11471146

1148-
def roundtrip(df, header=True, parser_hdr=0):
1147+
def roundtrip(df, header=True, parser_hdr=0, index=True):
11491148

11501149
with ensure_clean(self.ext) as path:
1151-
df.to_excel(path, header=header, merge_cells=self.merge_cells)
1150+
df.to_excel(path, header=header, merge_cells=self.merge_cells, index=index)
11521151
xf = pd.ExcelFile(path)
11531152
res = xf.parse(xf.sheet_names[0], header=parser_hdr)
11541153
return res
@@ -1164,7 +1163,7 @@ def roundtrip(df, header=True, parser_hdr=0):
11641163
#is implemented for now fixing #9794
11651164
if j>1:
11661165
with tm.assertRaises(NotImplementedError):
1167-
res = roundtrip(df, use_headers)
1166+
res = roundtrip(df, use_headers, index=False)
11681167
else:
11691168
res = roundtrip(df, use_headers)
11701169

@@ -1187,6 +1186,33 @@ def roundtrip(df, header=True, parser_hdr=0):
11871186
self.assertEqual(res.shape, (1, 2))
11881187
self.assertTrue(res.ix[0, 0] is not np.nan)
11891188

1189+
def test_excel_010_hemstring_raises_NotImplementedError(self):
1190+
# This test was failing only for j>1 and header=False,
1191+
# So I reproduced a simple test.
1192+
_skip_if_no_xlrd()
1193+
1194+
if self.merge_cells:
1195+
raise nose.SkipTest('Skip tests for merged MI format.')
1196+
1197+
from pandas.util.testing import makeCustomDataframe as mkdf
1198+
# ensure limited functionality in 0.10
1199+
# override of #2370 until sorted out in 0.11
1200+
1201+
def roundtrip2(df, header=True, parser_hdr=0, index=True):
1202+
1203+
with ensure_clean(self.ext) as path:
1204+
df.to_excel(path, header=header, merge_cells=self.merge_cells, index=index)
1205+
xf = pd.ExcelFile(path)
1206+
res = xf.parse(xf.sheet_names[0], header=parser_hdr)
1207+
return res
1208+
1209+
nrows = 5; ncols = 3
1210+
j = 2; i = 1
1211+
df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j)
1212+
with tm.assertRaises(NotImplementedError):
1213+
res = roundtrip2(df, header=False, index=False)
1214+
1215+
11901216
def test_duplicated_columns(self):
11911217
# Test for issue #5235.
11921218
_skip_if_no_xlrd()
@@ -1439,29 +1465,37 @@ class XlwtTests(ExcelWriterBase, tm.TestCase):
14391465
engine_name = 'xlwt'
14401466
check_skip = staticmethod(_skip_if_no_xlwt)
14411467

1442-
def test_excel_raise_not_implemented_error_on_multiindex_columns(self):
1468+
def test_excel_raise_error_on_multiindex_columns_and_no_index(self):
14431469
_skip_if_no_xlwt()
1444-
#MultiIndex as columns is not yet implemented 9794
1445-
cols = pd.MultiIndex.from_tuples([('site',''),
1446-
('2014','height'),
1447-
('2014','weight')])
1448-
df = pd.DataFrame(np.random.randn(10,3), columns=cols)
1470+
# MultiIndex as columns is not yet implemented 9794
1471+
cols = pd.MultiIndex.from_tuples([('site', ''),
1472+
('2014', 'height'),
1473+
('2014', 'weight')])
1474+
df = pd.DataFrame(np.random.randn(10, 3), columns=cols)
14491475
with tm.assertRaises(NotImplementedError):
14501476
with ensure_clean(self.ext) as path:
14511477
df.to_excel(path, index=False)
14521478

1479+
def test_excel_warns_verbosely_on_multiindex_columns_and_index_true(self):
1480+
_skip_if_no_xlwt()
1481+
cols = pd.MultiIndex.from_tuples([('site', ''),
1482+
('2014', 'height'),
1483+
('2014', 'weight')])
1484+
df = pd.DataFrame(np.random.randn(10, 3), columns=cols)
1485+
with tm.assert_produces_warning(UserWarning):
1486+
with ensure_clean(self.ext) as path:
1487+
df.to_excel(path, index=True)
1488+
14531489
def test_excel_multiindex_index(self):
14541490
_skip_if_no_xlwt()
1455-
#MultiIndex as index works so assert no error #9794
1456-
cols = pd.MultiIndex.from_tuples([('site',''),
1457-
('2014','height'),
1458-
('2014','weight')])
1459-
df = pd.DataFrame(np.random.randn(3,10), index=cols)
1491+
# MultiIndex as index works so assert no error #9794
1492+
cols = pd.MultiIndex.from_tuples([('site', ''),
1493+
('2014', 'height'),
1494+
('2014', 'weight')])
1495+
df = pd.DataFrame(np.random.randn(3, 10), index=cols)
14601496
with ensure_clean(self.ext) as path:
14611497
df.to_excel(path, index=False)
14621498

1463-
1464-
14651499
def test_to_excel_styleconverter(self):
14661500
_skip_if_no_xlwt()
14671501

0 commit comments

Comments
 (0)