From 0e6cd60c328d7386943fa8299d1aa873bc1fee20 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Sat, 15 Aug 2015 09:52:17 -0600 Subject: [PATCH] COMPAT:Allow multi-indexes to be written to excel. (Even though they cannot be read back in.) Closes #10564 --- doc/source/whatsnew/v0.17.0.txt | 1 + pandas/core/format.py | 18 ++++++++- pandas/core/frame.py | 14 +++---- pandas/io/tests/test_excel.py | 68 ++++++++++++++++++++++++--------- 4 files changed, 76 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index 7e69a8044a305..76efa6592877e 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -501,6 +501,7 @@ Other API Changes - Enable serialization of lists and dicts to strings in ExcelWriter (:issue:`8188`) - Allow passing `kwargs` to the interpolation methods (:issue:`10378`). - Serialize metadata properties of subclasses of pandas objects (:issue:`10553`). +- Allow ``DataFrame`` with ``MultiIndex`` columns to be written to Excel (:issue: `10564`). This was changed in 0.16.2 as the read-back method could not always guarantee perfect fidelity (:issue:`9794`). - ``Categorical.unique`` now returns new ``Categorical`` which ``categories`` and ``codes`` are unique, rather than returning ``np.array`` (:issue:`10508`) - unordered category: values and categories are sorted by appearance order. diff --git a/pandas/core/format.py b/pandas/core/format.py index 4ec4375349764..c04531c682413 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -4,6 +4,7 @@ # pylint: disable=W0141 import sys +import warnings from pandas.core.base import PandasObject from pandas.core.common import adjoin, notnull @@ -1640,11 +1641,14 @@ class ExcelFormatter(object): inf_rep : string, default `'inf'` representation for np.inf values (which aren't representable in Excel) A `'-'` sign will be added in front of -inf. + verbose: boolean, default True + If True, warn user that the resulting output file may not be + re-read or parsed directly by pandas. """ def __init__(self, df, na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, merge_cells=False, - inf_rep='inf'): + inf_rep='inf', verbose=True): self.df = df self.rowcounter = 0 self.na_rep = na_rep @@ -1657,6 +1661,7 @@ def __init__(self, df, na_rep='', float_format=None, cols=None, self.header = header self.merge_cells = merge_cells self.inf_rep = inf_rep + self.verbose = verbose def _format_value(self, val): if lib.checknull(val): @@ -1671,6 +1676,17 @@ def _format_value(self, val): return val def _format_header_mi(self): + + if self.columns.nlevels > 1: + if not self.index: + raise NotImplementedError("Writing to Excel with MultiIndex" + " columns and no index ('index'=False) " + "is not yet implemented.") + elif self.index and self.verbose: + warnings.warn("Writing to Excel with MultiIndex columns is a" + " one way serializable operation. You will not" + " be able to re-read or parse the output file.") + has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) if not(has_aliases or self.header): return diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 062cbe579785c..d9443fe09c623 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1247,7 +1247,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, - merge_cells=True, encoding=None, inf_rep='inf'): + merge_cells=True, encoding=None, inf_rep='inf', + verbose=True): """ Write DataFrame to a excel sheet @@ -1288,6 +1289,9 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', inf_rep : string, default 'inf' Representation for infinity (there is no native representation for infinity in Excel) + verbose: boolean, default True + If True, warn user that the resulting output file may not be + re-read or parsed directly by pandas. Notes ----- @@ -1304,12 +1308,8 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', strings before writing. """ from pandas.io.excel import ExcelWriter - if self.columns.nlevels > 1: - raise NotImplementedError("Writing as Excel with a MultiIndex is " - "not yet implemented.") - need_save = False - if encoding == None: + if encoding is None: encoding = 'ascii' if isinstance(excel_writer, compat.string_types): @@ -1324,7 +1324,7 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', index=index, index_label=index_label, merge_cells=merge_cells, - inf_rep=inf_rep) + inf_rep=inf_rep, verbose=verbose) formatted_cells = formatter.get_formatted_cells() excel_writer.write_cells(formatted_cells, sheet_name, startrow=startrow, startcol=startcol) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 26f4d65978fa0..83db59f9d9029 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -316,7 +316,6 @@ def test_read_from_file_url(self): tm.assert_frame_equal(url_table, local_table) - def test_xlsx_table(self): _skip_if_no_xlrd() _skip_if_no_openpyxl() @@ -1145,10 +1144,10 @@ def test_excel_010_hemstring(self): # ensure limited functionality in 0.10 # override of #2370 until sorted out in 0.11 - def roundtrip(df, header=True, parser_hdr=0): + def roundtrip(df, header=True, parser_hdr=0, index=True): with ensure_clean(self.ext) as path: - df.to_excel(path, header=header, merge_cells=self.merge_cells) + df.to_excel(path, header=header, merge_cells=self.merge_cells, index=index) xf = pd.ExcelFile(path) res = xf.parse(xf.sheet_names[0], header=parser_hdr) return res @@ -1164,7 +1163,7 @@ def roundtrip(df, header=True, parser_hdr=0): #is implemented for now fixing #9794 if j>1: with tm.assertRaises(NotImplementedError): - res = roundtrip(df, use_headers) + res = roundtrip(df, use_headers, index=False) else: res = roundtrip(df, use_headers) @@ -1187,6 +1186,33 @@ def roundtrip(df, header=True, parser_hdr=0): self.assertEqual(res.shape, (1, 2)) self.assertTrue(res.ix[0, 0] is not np.nan) + def test_excel_010_hemstring_raises_NotImplementedError(self): + # This test was failing only for j>1 and header=False, + # So I reproduced a simple test. + _skip_if_no_xlrd() + + if self.merge_cells: + raise nose.SkipTest('Skip tests for merged MI format.') + + from pandas.util.testing import makeCustomDataframe as mkdf + # ensure limited functionality in 0.10 + # override of #2370 until sorted out in 0.11 + + def roundtrip2(df, header=True, parser_hdr=0, index=True): + + with ensure_clean(self.ext) as path: + df.to_excel(path, header=header, merge_cells=self.merge_cells, index=index) + xf = pd.ExcelFile(path) + res = xf.parse(xf.sheet_names[0], header=parser_hdr) + return res + + nrows = 5; ncols = 3 + j = 2; i = 1 + df = mkdf(nrows, ncols, r_idx_nlevels=i, c_idx_nlevels=j) + with tm.assertRaises(NotImplementedError): + res = roundtrip2(df, header=False, index=False) + + def test_duplicated_columns(self): # Test for issue #5235. _skip_if_no_xlrd() @@ -1439,29 +1465,37 @@ class XlwtTests(ExcelWriterBase, tm.TestCase): engine_name = 'xlwt' check_skip = staticmethod(_skip_if_no_xlwt) - def test_excel_raise_not_implemented_error_on_multiindex_columns(self): + def test_excel_raise_error_on_multiindex_columns_and_no_index(self): _skip_if_no_xlwt() - #MultiIndex as columns is not yet implemented 9794 - cols = pd.MultiIndex.from_tuples([('site',''), - ('2014','height'), - ('2014','weight')]) - df = pd.DataFrame(np.random.randn(10,3), columns=cols) + # MultiIndex as columns is not yet implemented 9794 + cols = pd.MultiIndex.from_tuples([('site', ''), + ('2014', 'height'), + ('2014', 'weight')]) + df = pd.DataFrame(np.random.randn(10, 3), columns=cols) with tm.assertRaises(NotImplementedError): with ensure_clean(self.ext) as path: df.to_excel(path, index=False) + def test_excel_warns_verbosely_on_multiindex_columns_and_index_true(self): + _skip_if_no_xlwt() + cols = pd.MultiIndex.from_tuples([('site', ''), + ('2014', 'height'), + ('2014', 'weight')]) + df = pd.DataFrame(np.random.randn(10, 3), columns=cols) + with tm.assert_produces_warning(UserWarning): + with ensure_clean(self.ext) as path: + df.to_excel(path, index=True) + def test_excel_multiindex_index(self): _skip_if_no_xlwt() - #MultiIndex as index works so assert no error #9794 - cols = pd.MultiIndex.from_tuples([('site',''), - ('2014','height'), - ('2014','weight')]) - df = pd.DataFrame(np.random.randn(3,10), index=cols) + # MultiIndex as index works so assert no error #9794 + cols = pd.MultiIndex.from_tuples([('site', ''), + ('2014', 'height'), + ('2014', 'weight')]) + df = pd.DataFrame(np.random.randn(3, 10), index=cols) with ensure_clean(self.ext) as path: df.to_excel(path, index=False) - - def test_to_excel_styleconverter(self): _skip_if_no_xlwt()