diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt index 705aa9e3cc922..dc6e2d9f55977 100644 --- a/ci/requirements-2.7.txt +++ b/ci/requirements-2.7.txt @@ -8,7 +8,7 @@ numexpr==2.1 tables==2.3.1 matplotlib==1.1.1 openpyxl==1.6.2 -xlsxwriter==0.4.3 +xlsxwriter==0.4.6 xlrd==0.9.2 patsy==0.1.0 html5lib==1.0b2 diff --git a/ci/requirements-2.7_LOCALE.txt b/ci/requirements-2.7_LOCALE.txt index b18bff6797840..06574cdd6b299 100644 --- a/ci/requirements-2.7_LOCALE.txt +++ b/ci/requirements-2.7_LOCALE.txt @@ -2,7 +2,7 @@ python-dateutil pytz==2013b xlwt==0.7.5 openpyxl==1.6.2 -xlsxwriter==0.4.3 +xlsxwriter==0.4.6 xlrd==0.9.2 numpy==1.6.1 cython==0.19.1 diff --git a/ci/requirements-3.2.txt b/ci/requirements-3.2.txt index 0f3bdcbac38cb..136b5cf12cbc0 100644 --- a/ci/requirements-3.2.txt +++ b/ci/requirements-3.2.txt @@ -1,7 +1,7 @@ python-dateutil==2.1 pytz==2013b openpyxl==1.6.2 -xlsxwriter==0.4.3 +xlsxwriter==0.4.6 xlrd==0.9.2 numpy==1.7.1 cython==0.19.1 diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt index 3ca888d1623e3..480fde477d88b 100644 --- a/ci/requirements-3.3.txt +++ b/ci/requirements-3.3.txt @@ -1,7 +1,7 @@ python-dateutil==2.2 pytz==2013b openpyxl==1.6.2 -xlsxwriter==0.4.3 +xlsxwriter==0.4.6 xlrd==0.9.2 html5lib==1.0b2 numpy==1.8.0 diff --git a/doc/source/release.rst b/doc/source/release.rst index 4b33c20424b33..77d78b2892b90 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -209,6 +209,11 @@ Improvements to existing features by color as expected. - ``read_excel()`` now tries to convert integral floats (like ``1.0``) to int by default. (:issue:`5394`) + - Excel writers now have a default option ``merge_cells`` in ``to_excel()`` + to merge cells in MultiIndex and Hierarchical Rows. Note: using this + option it is no longer possible to round trip Excel files with merged + MultiIndex and Hierarchical Rows. Set the ``merge_cells`` to ``False`` to + restore the previous behaviour. (:issue:`5254`) API Changes ~~~~~~~~~~~ diff --git a/pandas/core/format.py b/pandas/core/format.py index 75069297360d6..5062fd9be6357 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -1213,7 +1213,7 @@ def __init__(self, row, col, val, "right": "thin", "bottom": "thin", "left": "thin"}, - "alignment": {"horizontal": "center"}} + "alignment": {"horizontal": "center", "vertical": "top"}} class ExcelFormatter(object): @@ -1237,10 +1237,12 @@ class ExcelFormatter(object): Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. + merge_cells : boolean, default False + Format MultiIndex and Hierarchical Rows as merged cells. """ def __init__(self, df, na_rep='', float_format=None, cols=None, - header=True, index=True, index_label=None): + header=True, index=True, index_label=None, merge_cells=False): self.df = df self.rowcounter = 0 self.na_rep = na_rep @@ -1251,6 +1253,7 @@ def __init__(self, df, na_rep='', float_format=None, cols=None, self.index = index self.index_label = index_label self.header = header + self.merge_cells = merge_cells def _format_value(self, val): if lib.checknull(val): @@ -1264,29 +1267,44 @@ def _format_header_mi(self): if not(has_aliases or self.header): return - levels = self.columns.format(sparsify=True, adjoin=False, - names=False) - # level_lenghts = _get_level_lengths(levels) - coloffset = 1 - if isinstance(self.df.index, MultiIndex): - coloffset = len(self.df.index[0]) - - # for lnum, (records, values) in enumerate(zip(level_lenghts, - # levels)): - # name = self.columns.names[lnum] - # yield ExcelCell(lnum, coloffset, name, header_style) - # for i in records: - # if records[i] > 1: - # yield ExcelCell(lnum,coloffset + i + 1, values[i], - # header_style, lnum, coloffset + i + records[i]) - # else: - # yield ExcelCell(lnum, coloffset + i + 1, values[i], header_style) - - # self.rowcounter = lnum + columns = self.columns + level_strs = columns.format(sparsify=True, adjoin=False, names=False) + level_lengths = _get_level_lengths(level_strs) + coloffset = 0 lnum = 0 - for i, values in enumerate(zip(*levels)): - v = ".".join(map(com.pprint_thing, values)) - yield ExcelCell(lnum, coloffset + i, v, header_style) + + if isinstance(self.df.index, MultiIndex): + coloffset = len(self.df.index[0]) - 1 + + if self.merge_cells: + # Format multi-index as a merged cells. + for lnum in range(len(level_lengths)): + name = columns.names[lnum] + yield ExcelCell(lnum, coloffset, name, header_style) + + for lnum, (spans, levels, labels) in enumerate(zip(level_lengths, + columns.levels, + columns.labels) + ): + values = levels.take(labels) + for i in spans: + if spans[i] > 1: + yield ExcelCell(lnum, + coloffset + i + 1, + values[i], + header_style, + lnum, + coloffset + i + spans[i]) + else: + yield ExcelCell(lnum, + coloffset + i + 1, + values[i], + header_style) + else: + # Format in legacy format with dots to indicate levels. + for i, values in enumerate(zip(*level_strs)): + v = ".".join(map(com.pprint_thing, values)) + yield ExcelCell(lnum, coloffset + i + 1, v, header_style) self.rowcounter = lnum @@ -1354,14 +1372,17 @@ def _format_regular_rows(self): index_label = self.df.index.names[0] if index_label and self.header is not False: - # add to same level as column names - # if isinstance(self.df.columns, MultiIndex): - # yield ExcelCell(self.rowcounter, 0, - # index_label, header_style) - # self.rowcounter += 1 - # else: - yield ExcelCell(self.rowcounter - 1, 0, - index_label, header_style) + if self.merge_cells: + yield ExcelCell(self.rowcounter, + 0, + index_label, + header_style) + self.rowcounter += 1 + else: + yield ExcelCell(self.rowcounter - 1, + 0, + index_label, + header_style) # write index_values index_values = self.df.index @@ -1383,7 +1404,7 @@ def _format_hierarchical_rows(self): self.rowcounter += 1 gcolidx = 0 - # output index and index_label? + if self.index: index_labels = self.df.index.names # check for aliases @@ -1394,20 +1415,51 @@ def _format_hierarchical_rows(self): # if index labels are not empty go ahead and dump if (any(x is not None for x in index_labels) and self.header is not False): - # if isinstance(self.df.columns, MultiIndex): - # self.rowcounter += 1 - # else: - self.rowcounter -= 1 + + if not self.merge_cells: + self.rowcounter -= 1 + for cidx, name in enumerate(index_labels): - yield ExcelCell(self.rowcounter, cidx, - name, header_style) + yield ExcelCell(self.rowcounter, + cidx, + name, + header_style) self.rowcounter += 1 - for indexcolvals in zip(*self.df.index): - for idx, indexcolval in enumerate(indexcolvals): - yield ExcelCell(self.rowcounter + idx, gcolidx, - indexcolval, header_style) - gcolidx += 1 + if self.merge_cells: + # Format hierarchical rows as merged cells. + level_strs = self.df.index.format(sparsify=True, adjoin=False, + names=False) + level_lengths = _get_level_lengths(level_strs) + + for spans, levels, labels in zip(level_lengths, + self.df.index.levels, + self.df.index.labels): + values = levels.take(labels) + for i in spans: + if spans[i] > 1: + yield ExcelCell(self.rowcounter + i, + gcolidx, + values[i], + header_style, + self.rowcounter + i + spans[i] - 1, + gcolidx) + else: + yield ExcelCell(self.rowcounter + i, + gcolidx, + values[i], + header_style) + gcolidx += 1 + + else: + # Format hierarchical rows with non-merged values. + for indexcolvals in zip(*self.df.index): + for idx, indexcolval in enumerate(indexcolvals): + yield ExcelCell(self.rowcounter + idx, + gcolidx, + indexcolval, + header_style) + gcolidx += 1 for colidx in range(len(self.columns)): series = self.df.iloc[:, colidx] @@ -1415,8 +1467,8 @@ def _format_hierarchical_rows(self): yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val) def get_formatted_cells(self): - for cell in itertools.chain(self._format_header(), self._format_body() - ): + for cell in itertools.chain(self._format_header(), + self._format_body()): cell.val = self._format_value(cell.val) yield cell diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0a5306de9bbb5..18fba179f0654 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1130,7 +1130,8 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, cols=None, header=True, index=True, - index_label=None, startrow=0, startcol=0, engine=None): + index_label=None, startrow=0, startcol=0, engine=None, + merge_cells=True): """ Write DataFrame to a excel sheet @@ -1161,13 +1162,15 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', write engine to use - you can also set this via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and ``io.excel.xlsm.writer``. - + merge_cells : boolean, default True + Write MultiIndex and Hierarchical Rows as merged cells. Notes ----- If passing an existing ExcelWriter object, then the sheet will be added to the existing workbook. This can be used to save different DataFrames to one workbook + >>> writer = ExcelWriter('output.xlsx') >>> df1.to_excel(writer,'Sheet1') >>> df2.to_excel(writer,'Sheet2') @@ -1185,7 +1188,8 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', header=header, float_format=float_format, index=index, - index_label=index_label) + index_label=index_label, + merge_cells=merge_cells) formatted_cells = formatter.get_formatted_cells() excel_writer.write_cells(formatted_cells, sheet_name, startrow=startrow, startcol=startcol) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 42c212caf41ca..b97c9da0b0d18 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -146,7 +146,7 @@ def __init__(self, io, **kwds): def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, index_col=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None, - convert_float=True, **kwds): + convert_float=True, has_index_names=False, **kwds): """Read an Excel table into DataFrame Parameters @@ -169,25 +169,29 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, parsed * If string then indicates comma separated list of column names and column ranges (e.g. "A:E" or "A,C,E:F") + parse_dates : boolean, default False + Parse date Excel values, + date_parser : function default None + Date parsing function na_values : list-like, default None List of additional strings to recognize as NA/NaN - keep_default_na : bool, default True - If na_values are specified and keep_default_na is False the default - NaN values are overridden, otherwise they're appended to - verbose : boolean, default False - Indicate number of NA values placed in non-numeric columns + thousands : str, default None + Thousands separator + chunksize : int, default None + Size of file chunk to read for lazy evaluation. convert_float : boolean, default True convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric data will be read in as floats: Excel stores all numbers as floats internally. + has_index_names : boolean, default False + True if the cols defined in index_col have an index name and are + not in the header Returns ------- parsed : DataFrame DataFrame parsed from the Excel file """ - has_index_names = False # removed as new argument of API function - skipfooter = kwds.pop('skipfooter', None) if skipfooter is not None: skip_footer = skipfooter @@ -506,6 +510,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): colletter = get_column_letter(startcol + cell.col + 1) xcell = wks.cell("%s%s" % (colletter, startrow + cell.row + 1)) xcell.value = _conv_value(cell.val) + style = None if cell.style: style = self._convert_to_style(cell.style) for field in style.__fields__: @@ -517,8 +522,6 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): elif isinstance(cell.val, datetime.date): xcell.style.number_format.format_code = "YYYY-MM-DD" - # merging requires openpyxl latest (works on 1.6.1) - # todo add version check if cell.mergestart is not None and cell.mergeend is not None: cletterstart = get_column_letter(startcol + cell.col + 1) cletterend = get_column_letter(startcol + cell.mergeend + 1) @@ -528,6 +531,25 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): cletterend, startrow + cell.mergestart + 1)) + # Excel requires that the format of the first cell in a merged + # range is repeated in the rest of the merged range. + if style: + first_row = startrow + cell.row + 1 + last_row = startrow + cell.mergestart + 1 + first_col = startcol + cell.col + 1 + last_col = startcol + cell.mergeend + 1 + + for row in range(first_row, last_row + 1): + for col in range(first_col, last_col + 1): + if row == first_row and col == first_col: + # Ignore first cell. It is already handled. + continue + colletter = get_column_letter(col) + xcell = wks.cell("%s%s" % (colletter, row)) + for field in style.__fields__: + xcell.style.__setattr__(field, \ + style.__getattribute__(field)) + @classmethod def _convert_to_style(cls, style_dict): """ @@ -723,8 +745,8 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): if cell.mergestart is not None and cell.mergeend is not None: wks.merge_range(startrow + cell.row, - startrow + cell.mergestart, startcol + cell.col, + startrow + cell.mergestart, startcol + cell.mergeend, cell.val, style) else: @@ -752,6 +774,16 @@ def _convert_to_style(self, style_dict, num_format_str=None): if font.get('bold'): xl_format.set_bold() + # Map the alignment to XlsxWriter alignment properties. + alignment = style_dict.get('alignment') + if alignment: + if (alignment.get('horizontal') + and alignment['horizontal'] == 'center'): + xl_format.set_align('center') + if (alignment.get('vertical') + and alignment['vertical'] == 'top'): + xl_format.set_align('top') + # Map the cell borders to XlsxWriter border properties. if style_dict.get('borders'): xl_format.set_border() diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 6eb3cbf1a3903..8bcf5e461ce7c 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -102,7 +102,8 @@ def test_parse_cols_int(self): df2 = df2.reindex(columns=['A', 'B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols=3) - tm.assert_frame_equal(df, df2, check_names=False) # TODO add index to xls file) + # TODO add index to xls file) + tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) def test_parse_cols_list(self): @@ -121,7 +122,8 @@ def test_parse_cols_list(self): df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols=[0, 2, 3]) - tm.assert_frame_equal(df, df2, check_names=False) # TODO add index to xls file + # TODO add index to xls file) + tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) def test_parse_cols_str(self): @@ -141,7 +143,8 @@ def test_parse_cols_str(self): df2 = df2.reindex(columns=['A', 'B', 'C']) df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols='A:D') - tm.assert_frame_equal(df, df2, check_names=False) # TODO add index to xls, read xls ignores index name ? + # TODO add index to xls, read xls ignores index name ? + tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) del df, df2, df3 @@ -152,7 +155,8 @@ def test_parse_cols_str(self): df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True, parse_cols='A,C,D') - tm.assert_frame_equal(df, df2, check_names=False) # TODO add index to xls file + # TODO add index to xls file + tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) del df, df2, df3 @@ -284,7 +288,8 @@ def test_xlsx_table(self): df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True) df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True) - tm.assert_frame_equal(df, df2, check_names=False) # TODO add index to xlsx file + # TODO add index to xlsx file + tm.assert_frame_equal(df, df2, check_names=False) tm.assert_frame_equal(df3, df2, check_names=False) df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True, @@ -365,6 +370,10 @@ class ExcelWriterBase(SharedItems): # 2. Add a property ext, which is the file extension that your writer # writes to. (needs to start with '.' so it's a valid path) # 3. Add a property engine_name, which is the name of the writer class. + + # Test with MultiIndex and Hierarchical Rows as merged cells. + merge_cells = True + def setUp(self): self.check_skip() super(ExcelWriterBase, self).setUp() @@ -433,7 +442,8 @@ def test_roundtrip(self): tm.assert_frame_equal(self.frame, recons) self.frame.to_excel(path, 'test1', na_rep='88') - recons = read_excel(path, 'test1', index_col=0, na_values=[88, 88.0]) + recons = read_excel(path, 'test1', index_col=0, + na_values=[88, 88.0]) tm.assert_frame_equal(self.frame, recons) def test_mixed(self): @@ -571,39 +581,56 @@ def test_roundtrip_indexlabels(self): # test index_label frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, 'test1', index_label=['test']) + frame.to_excel(path, 'test1', + index_label=['test'], + merge_cells=self.merge_cells) reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0).astype(np.int64) + recons = reader.parse('test1', + index_col=0, + has_index_names=self.merge_cells + ).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel( - path, 'test1', index_label=['test', 'dummy', 'dummy2']) + frame.to_excel(path, + 'test1', + index_label=['test', 'dummy', 'dummy2'], + merge_cells=self.merge_cells) reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0).astype(np.int64) + recons = reader.parse('test1', + index_col=0, + has_index_names=self.merge_cells + ).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, 'test1', index_label='test') + frame.to_excel(path, + 'test1', + index_label='test', + merge_cells=self.merge_cells) reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0).astype(np.int64) + recons = reader.parse('test1', + index_col=0, + has_index_names=self.merge_cells + ).astype(np.int64) frame.index.names = ['test'] - self.assertEqual(frame.index.names, recons.index.names) + self.assertAlmostEqual(frame.index.names, recons.index.names) with ensure_clean(self.ext) as path: - self.frame.to_excel(path, 'test1', - cols=['A', 'B', 'C', 'D'], index=False) - # take 'A' and 'B' as indexes (they are in same row as cols 'C', - # 'D') + self.frame.to_excel(path, + 'test1', + cols=['A', 'B', 'C', 'D'], + index=False, merge_cells=self.merge_cells) + # take 'A' and 'B' as indexes (same row as cols 'C', 'D') df = self.frame.copy() df = df.set_index(['A', 'B']) reader = ExcelFile(path) recons = reader.parse('test1', index_col=[0, 1]) - tm.assert_frame_equal(df, recons) + tm.assert_frame_equal(df, recons, check_less_precise=True) def test_excel_roundtrip_indexname(self): _skip_if_no_xlrd() @@ -612,10 +639,12 @@ def test_excel_roundtrip_indexname(self): df.index.name = 'foo' with ensure_clean(self.ext) as path: - df.to_excel(path) + df.to_excel(path, merge_cells=self.merge_cells) xf = ExcelFile(path) - result = xf.parse(xf.sheet_names[0], index_col=0) + result = xf.parse(xf.sheet_names[0], + index_col=0, + has_index_names=self.merge_cells) tm.assert_frame_equal(result, df) self.assertEqual(result.index.name, 'foo') @@ -624,19 +653,18 @@ def test_excel_roundtrip_datetime(self): _skip_if_no_xlrd() # datetime.date, not sure what to test here exactly - path = '__tmp_excel_roundtrip_datetime__.' + self.ext tsf = self.tsframe.copy() with ensure_clean(self.ext) as path: tsf.index = [x.date() for x in self.tsframe.index] - tsf.to_excel(path, 'test1') + tsf.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) recons = reader.parse('test1') tm.assert_frame_equal(self.tsframe, recons) def test_to_excel_periodindex(self): _skip_if_no_xlrd() - path = '__tmp_to_excel_periodindex__.' + self.ext + frame = self.tsframe xp = frame.resample('M', kind='period') @@ -651,8 +679,7 @@ def test_to_excel_multiindex(self): _skip_if_no_xlrd() frame = self.frame - old_index = frame.index - arrays = np.arange(len(old_index) * 2).reshape(2, -1) + arrays = np.arange(len(frame.index) * 2).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) frame.index = new_index @@ -662,42 +689,36 @@ def test_to_excel_multiindex(self): frame.to_excel(path, 'test1', cols=['A', 'B']) # round trip - frame.to_excel(path, 'test1') + frame.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) - df = reader.parse('test1', index_col=[0, 1], parse_dates=False) + df = reader.parse('test1', index_col=[0, 1], + parse_dates=False, + has_index_names=self.merge_cells) tm.assert_frame_equal(frame, df) self.assertEqual(frame.index.names, df.index.names) - self.frame.index = old_index # needed if setUP becomes a classmethod def test_to_excel_multiindex_dates(self): _skip_if_no_xlrd() # try multiindex with dates - tsframe = self.tsframe - old_index = tsframe.index - new_index = [old_index, np.arange(len(old_index))] + tsframe = self.tsframe.copy() + new_index = [tsframe.index, np.arange(len(tsframe.index))] tsframe.index = MultiIndex.from_arrays(new_index) with ensure_clean(self.ext) as path: - tsframe.to_excel(path, 'test1', index_label=['time', 'foo']) + tsframe.index.names = ['time', 'foo'] + tsframe.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) - recons = reader.parse('test1', index_col=[0, 1]) - - tm.assert_frame_equal(tsframe, recons, check_names=False) - self.assertEquals(recons.index.names, ('time', 'foo')) + recons = reader.parse('test1', + index_col=[0, 1], + has_index_names=self.merge_cells) - # infer index - tsframe.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = reader.parse('test1') tm.assert_frame_equal(tsframe, recons) - - self.tsframe.index = old_index # needed if setUP becomes classmethod + self.assertEquals(recons.index.names, ('time', 'foo')) def test_to_excel_float_format(self): _skip_if_no_xlrd() - ext = self.ext - filename = '__tmp_to_excel_float_format__.' + ext + df = DataFrame([[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], index=['A', 'B'], columns=['X', 'Y', 'Z']) @@ -835,8 +856,13 @@ def test_to_excel_unicode_filename(self): # for maddr in mergedcells_addrs: # self.assertTrue(ws.cell(maddr).merged) # os.remove(filename) + def test_excel_010_hemstring(self): _skip_if_no_xlrd() + + if self.merge_cells: + raise nose.SkipTest('Skip tests for merged MI format.') + from pandas.util.testing import makeCustomDataframe as mkdf # ensure limited functionality in 0.10 # override of #2370 until sorted out in 0.11 @@ -844,7 +870,7 @@ def test_excel_010_hemstring(self): def roundtrip(df, header=True, parser_hdr=0): with ensure_clean(self.ext) as path: - df.to_excel(path, header=header) + df.to_excel(path, header=header, merge_cells=self.merge_cells) xf = pd.ExcelFile(path) res = xf.parse(xf.sheet_names[0], header=parser_hdr) return res @@ -917,7 +943,7 @@ def test_to_excel_styleconverter(self): "right": "thin", "bottom": "thin", "left": "thin"}, - "alignment": {"horizontal": "center"}} + "alignment": {"horizontal": "center", "vertical": "top"}} xlsx_style = _OpenpyxlWriter._convert_to_style(hstyle) self.assertTrue(xlsx_style.font.bold) @@ -931,6 +957,8 @@ def test_to_excel_styleconverter(self): xlsx_style.borders.left.border_style) self.assertEquals(openpyxl.style.Alignment.HORIZONTAL_CENTER, xlsx_style.alignment.horizontal) + self.assertEquals(openpyxl.style.Alignment.VERTICAL_TOP, + xlsx_style.alignment.vertical) class XlwtTests(ExcelWriterBase, unittest.TestCase): @@ -948,7 +976,8 @@ def test_to_excel_styleconverter(self): "right": "thin", "bottom": "thin", "left": "thin"}, - "alignment": {"horizontal": "center"}} + "alignment": {"horizontal": "center", "vertical": "top"}} + xls_style = _XlwtWriter._convert_to_style(hstyle) self.assertTrue(xls_style.font.bold) self.assertEquals(xlwt.Borders.THIN, xls_style.borders.top) @@ -956,6 +985,7 @@ def test_to_excel_styleconverter(self): self.assertEquals(xlwt.Borders.THIN, xls_style.borders.bottom) self.assertEquals(xlwt.Borders.THIN, xls_style.borders.left) self.assertEquals(xlwt.Alignment.HORZ_CENTER, xls_style.alignment.horz) + self.assertEquals(xlwt.Alignment.VERT_TOP, xls_style.alignment.vert) class XlsxWriterTests(ExcelWriterBase, unittest.TestCase): @@ -963,48 +993,38 @@ class XlsxWriterTests(ExcelWriterBase, unittest.TestCase): engine_name = 'xlsxwriter' check_skip = staticmethod(_skip_if_no_xlsxwriter) - # Override test from the Superclass to use assertAlmostEqual on the - # floating point values read back in from the output XlsxWriter file. - def test_roundtrip_indexlabels(self): - _skip_if_no_xlrd() - with ensure_clean(self.ext) as path: +class OpenpyxlTests_NoMerge(ExcelWriterBase, unittest.TestCase): + ext = '.xlsx' + engine_name = 'openpyxl' + check_skip = staticmethod(_skip_if_no_openpyxl) - self.frame['A'][:5] = nan + # Test < 0.13 non-merge behaviour for MultiIndex and Hierarchical Rows. + merge_cells = False - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', cols=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - # test index_label - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, 'test1', index_label=['test']) - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0).astype(np.int64) - frame.index.names = ['test'] - self.assertEqual(frame.index.names, recons.index.names) +class XlwtTests_NoMerge(ExcelWriterBase, unittest.TestCase): + ext = '.xls' + engine_name = 'xlwt' + check_skip = staticmethod(_skip_if_no_xlwt) - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel( - path, 'test1', index_label=['test', 'dummy', 'dummy2']) - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0).astype(np.int64) - frame.index.names = ['test'] - self.assertEqual(frame.index.names, recons.index.names) + # Test < 0.13 non-merge behaviour for MultiIndex and Hierarchical Rows. + merge_cells = False - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, 'test1', index_label='test') - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0).astype(np.int64) - frame.index.names = ['test'] - self.assertAlmostEqual(frame.index.names, recons.index.names) + +class XlsxWriterTests_NoMerge(ExcelWriterBase, unittest.TestCase): + ext = '.xlsx' + engine_name = 'xlsxwriter' + check_skip = staticmethod(_skip_if_no_xlsxwriter) + + # Test < 0.13 non-merge behaviour for MultiIndex and Hierarchical Rows. + merge_cells = False class ExcelWriterEngineTests(unittest.TestCase): def test_ExcelWriter_dispatch(self): with tm.assertRaisesRegexp(ValueError, 'No engine'): - writer = ExcelWriter('nothing') + ExcelWriter('nothing') _skip_if_no_openpyxl() writer = ExcelWriter('apple.xlsx') @@ -1046,7 +1066,6 @@ def check_called(func): func = lambda: df.to_excel('something.test') check_called(func) check_called(lambda: panel.to_excel('something.test')) - from pandas import set_option, get_option val = get_option('io.excel.xlsx.writer') set_option('io.excel.xlsx.writer', 'dummy') check_called(lambda: df.to_excel('something.xlsx'))