diff --git a/pandas/core/format.py b/pandas/core/format.py index d13cee0b24da2..db50955c13c3e 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -17,6 +17,9 @@ import numpy as np +import itertools + + docstring_to_string = """ Parameters ---------- @@ -400,6 +403,7 @@ def _get_column_name_list(self): names.append('' if columns.name is None else columns.name) return names + class HTMLFormatter(object): indent_delta = 2 @@ -674,6 +678,217 @@ def grouper(x): return result + +#from collections import namedtuple +# ExcelCell = namedtuple("ExcelCell", +# 'row, col, val, style, mergestart, mergeend') + +class ExcelCell: + __fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend') + __slots__ = __fields__ + + def __init__(self, row, col, val, + style=None, mergestart=None, mergeend=None): + self.row = row + self.col = col + self.val = val + self.style = style + self.mergestart = mergestart + self.mergeend = mergeend + + +header_style = {"font": {"bold": True}, + "borders": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "alignment": {"horizontal": "center"}} + + +class ExcelFormatter(object): + """ + Class for formatting a DataFrame to a list of ExcelCells, + + Parameters + ---------- + df : dataframe + na_rep: na representation + float_format : string, default None + Format string for floating point numbers + cols : sequence, optional + Columns to write + header : boolean or list of string, default True + Write out column names. If a list of string is given it is + assumed to be aliases for the column names + index : boolean, default True + output row names (index) + index_label : string or sequence, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. + """ + + def __init__(self, + df, + na_rep='', + float_format=None, + cols=None, + header=True, + index=True, + index_label=None + ): + self.df = df + self.rowcounter = 0 + self.na_rep = na_rep + self.columns = cols + if cols is None: + self.columns = df.columns + self.float_format = float_format + self.index = index + self.index_label = index_label + self.header = header + + def _format_value(self, val): + if lib.checknull(val): + val = self.na_rep + if self.float_format is not None and com.is_float(val): + val = float(self.float_format % val) + return val + + def _format_header_mi(self): + levels = self.columns.format(sparsify=True, adjoin=False, + names=False) + level_lenghts = _get_level_lengths(levels) + coloffset = 0 + if isinstance(self.df.index, MultiIndex): + coloffset = len(self.df.index[0]) - 1 + + for lnum, (records, values) in enumerate(zip(level_lenghts, + levels)): + name = self.columns.names[lnum] + yield ExcelCell(lnum, coloffset, name, header_style) + for i in records: + if records[i] > 1: + yield ExcelCell(lnum,coloffset + i + 1, values[i], + header_style, lnum, coloffset + i + records[i]) + else: + yield ExcelCell(lnum, coloffset + i + 1, values[i], header_style) + + self.rowcounter = lnum + + def _format_header_regular(self): + has_aliases = isinstance(self.header, (tuple, list, np.ndarray)) + if has_aliases or self.header: + coloffset = 0 + if self.index: + coloffset = 1 + if isinstance(self.df.index, MultiIndex): + coloffset = len(self.df.index[0]) + + colnames = self.columns + if has_aliases: + if len(self.header) != len(self.columns): + raise ValueError(('Writing %d cols but got %d aliases' + % (len(self.columns), len(self.header)))) + else: + colnames = self.header + + for colindex, colname in enumerate(colnames): + yield ExcelCell(self.rowcounter, colindex + coloffset, colname, + header_style) + + def _format_header(self): + if isinstance(self.columns, MultiIndex): + gen = self._format_header_mi() + else: + gen = self._format_header_regular() + + gen2 = () + if self.df.index.names: + row = [x if x is not None else '' + for x in self.df.index.names] + [''] * len(self.columns) + if reduce(lambda x, y: x and y, map(lambda x: x != '', row)): + gen2 = (ExcelCell(self.rowcounter, colindex, val, header_style) + for colindex, val in enumerate(row)) + self.rowcounter += 1 + return itertools.chain(gen, gen2) + + def _format_body(self): + + if isinstance(self.df.index, MultiIndex): + return self._format_hierarchical_rows() + else: + return self._format_regular_rows() + + def _format_regular_rows(self): + self.rowcounter += 1 + + coloffset = 0 + #output index and index_label? + if self.index: + #chek aliases + #if list only take first as this is not a MultiIndex + if self.index_label and isinstance(self.index_label, + (list, tuple, np.ndarray)): + index_label = self.index_label[0] + #if string good to go + elif self.index_label and isinstance(self.index_label, str): + index_label = self.index_label + else: + index_label = self.df.index.names[0] + + if index_label: + yield ExcelCell(self.rowcounter, 0, + index_label, header_style) + self.rowcounter += 1 + + #write index_values + index_values = self.df.index + coloffset = 1 + for idx, idxval in enumerate(index_values): + yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style) + + for colidx, colname in enumerate(self.columns): + series = self.df[colname] + for i, val in enumerate(series): + yield ExcelCell(self.rowcounter + i, colidx + coloffset, val) + + def _format_hierarchical_rows(self): + self.rowcounter += 1 + + gcolidx = 0 + #output index and index_label? + if self.index: + index_labels = self.df.index.names + #check for aliases + if self.index_label and isinstance(self.index_label, + (list, tuple, np.ndarray)): + index_labels = self.index_label + + #if index labels are not empty go ahead and dump + if filter(lambda x: x is not None, index_labels): + for cidx, name in enumerate(index_labels): + yield ExcelCell(self.rowcounter, cidx, + name, header_style) + self.rowcounter += 1 + + for indexcolvals in zip(*self.df.index): + for idx, indexcolval in enumerate(indexcolvals): + yield ExcelCell(self.rowcounter + idx, gcolidx, + indexcolval, header_style) + gcolidx += 1 + + for colidx, colname in enumerate(self.columns): + series = self.df[colname] + for i, val in enumerate(series): + yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val) + + def get_formatted_cells(self): + for cell in itertools.chain(self._format_header(), + self._format_body()): + cell.val = self._format_value(cell.val) + yield cell + #---------------------------------------------------------------------- # Array formatters diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 35d895bed43f1..ebe361a33b28c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1221,7 +1221,7 @@ def to_panel(self): to_wide = deprecate('to_wide', to_panel) - def _helper_csvexcel(self, writer, na_rep=None, cols=None, + def _helper_csv(self, writer, na_rep=None, cols=None, header=True, index=True, index_label=None, float_format=None): if cols is None: @@ -1356,7 +1356,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, else: csvout = csv.writer(f, lineterminator='\n', delimiter=sep, quoting=quoting) - self._helper_csvexcel(csvout, na_rep=na_rep, + self._helper_csv(csvout, na_rep=na_rep, float_format=float_format, cols=cols, header=header, index=index, index_label=index_label) @@ -1367,7 +1367,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', float_format=None, cols=None, header=True, index=True, - index_label=None): + index_label=None, startrow=0, startcol=0): """ Write DataFrame to a excel sheet @@ -1392,6 +1392,9 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. + startow : upper left cell row to dump data frame + startcol : upper left cell column to dump data frame + Notes ----- @@ -1408,11 +1411,17 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', if isinstance(excel_writer, basestring): excel_writer = ExcelWriter(excel_writer) need_save = True - excel_writer.cur_sheet = sheet_name - self._helper_csvexcel(excel_writer, na_rep=na_rep, - float_format=float_format, cols=cols, - header=header, index=index, - index_label=index_label) + + formatter = fmt.ExcelFormatter(self, + na_rep=na_rep, + cols=cols, + header=header, + float_format=float_format, + index=index, + index_label=index_label) + formatted_cells = formatter.get_formatted_cells() + excel_writer.write_cells(formatted_cells, sheet_name, + startrow=startrow, startcol=startcol) if need_save: excel_writer.save() diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a5fc7ebeed101..14a01b38ae88e 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -6,6 +6,7 @@ from itertools import izip from urlparse import urlparse import csv +import xlwt import numpy as np @@ -20,6 +21,7 @@ import pandas.lib as lib import pandas._parser as _parser +from pandas.tseries.period import Period class DateConversionError(Exception): pass @@ -456,6 +458,8 @@ def __init__(self, f, engine='python', **kwds): # might mutate self.engine self.options, self.engine = self._clean_options(options, engine) + if 'has_index_labels' in kwds: + self.options['has_index_labels'] = kwds['has_index_labels'] self._make_engine(self.engine) @@ -931,6 +935,9 @@ def TextParser(*args, **kwds): rows will be discarded index_col : int or list, default None Column or columns to use as the (possibly hierarchical) index + has_index_labels: boolean, default False + True if the cols defined in index_col have an index name and are + not in the header na_values : iterable, default None Custom NA values keep_default_na : bool, default True @@ -969,6 +976,10 @@ def TextParser(*args, **kwds): # verbose=False, encoding=None, squeeze=False): +def count_empty_vals(vals): + return sum([1 for v in vals if v == '' or v is None]) + + class PythonParser(ParserBase): def __init__(self, f, **kwds): @@ -995,6 +1006,9 @@ def __init__(self, f, **kwds): self.doublequote = kwds['doublequote'] self.skipinitialspace = kwds['skipinitialspace'] self.quoting = kwds['quoting'] + self.has_index_labels = False + if 'has_index_labels' in kwds: + self.has_index_labels = kwds['has_index_labels'] self.verbose = kwds['verbose'] self.converters = kwds['converters'] @@ -1099,6 +1113,13 @@ def read(self, rows=None): self.index_col, self.index_names) + #handle new style for names in index + count_empty_content_vals = count_empty_vals(content[0]) + indexnamerow = None + if self.has_index_labels and count_empty_content_vals == len(columns): + indexnamerow = content[0] + content = content[1:] + alldata = self._rows_to_cols(content) data = self._exclude_implicit_index(alldata) @@ -1106,6 +1127,9 @@ def read(self, rows=None): data = self._convert_data(data) index = self._make_index(data, alldata, columns) + if indexnamerow: + coffset = len(indexnamerow) - len(columns) + index.names = indexnamerow[:coffset] return index, columns, data @@ -1699,7 +1723,7 @@ def __repr__(self): return object.__repr__(self) def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, - index_col=None, parse_cols=None, parse_dates=False, + index_col=None, has_index_labels=False, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None, **kwds): """ @@ -1718,6 +1742,9 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, index_col : int, default None Column to use as the row labels of the DataFrame. Pass None if there is no such column + has_index_labels: boolean, default False + True if the cols defined in index_col have an index name and are + not in the header parse_cols : int or list, default None If None then parse all columns, If int then indicates last column to be parsed @@ -1739,6 +1766,7 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, False: self._parse_xls} return choose[self.use_xlsx](sheetname, header=header, skiprows=skiprows, index_col=index_col, + has_index_labels=has_index_labels, parse_cols=parse_cols, parse_dates=parse_dates, date_parser=date_parser, @@ -1780,7 +1808,7 @@ def _excel2num(x): return i in parse_cols def _parse_xlsx(self, sheetname, header=0, skiprows=None, - skip_footer=0, index_col=None, + skip_footer=0, index_col=None, has_index_labels=False, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None): sheet = self.book.get_sheet_by_name(name=sheetname) @@ -1804,6 +1832,7 @@ def _parse_xlsx(self, sheetname, header=0, skiprows=None, data[header] = _trim_excel_header(data[header]) parser = TextParser(data, header=header, index_col=index_col, + has_index_labels=has_index_labels, na_values=na_values, thousands=thousands, parse_dates=parse_dates, @@ -1815,7 +1844,7 @@ def _parse_xlsx(self, sheetname, header=0, skiprows=None, return parser.read() def _parse_xls(self, sheetname, header=0, skiprows=None, - skip_footer=0, index_col=None, + skip_footer=0, index_col=None, has_index_labels=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None): from xlrd import xldate_as_tuple, XL_CELL_DATE, XL_CELL_ERROR @@ -1849,6 +1878,7 @@ def _parse_xls(self, sheetname, header=0, skiprows=None, data[header] = _trim_excel_header(data[header]) parser = TextParser(data, header=header, index_col=index_col, + has_index_labels=has_index_labels, na_values=na_values, thousands=thousands, parse_dates=parse_dates, @@ -1869,11 +1899,97 @@ def sheet_names(self): def _trim_excel_header(row): # trim header row so auto-index inference works - while len(row) > 0 and row[0] == '': + # xlrd uses '' , openpyxl None + while len(row) > 0 and (row[0] == '' or row[0] is None): row = row[1:] return row +class CellStyleConverter(object): + """ + Utility Class which converts a style dict to xlrd or openpyxl style + """ + + @staticmethod + def to_xls(style_dict): + """ + converts a style_dict to an xlwt style object + Parameters + ---------- + style_dict: style dictionary to convert + """ + def style_to_xlwt(item, firstlevel=True, field_sep=',', line_sep=';'): + """helper wich recursively generate an xlwt easy style string + for example: + + hstyle = {"font": {"bold": True}, + "border": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "align": {"horiz": "center"}} + will be converted to + font: bold on; \ + border: top thin, right thin, bottom thin, left thin; \ + align: horiz center; + """ + if hasattr(item, 'items'): + if firstlevel: + it = ["%s: %s" % (key, style_to_xlwt(value, False)) + for key, value in item.items()] + out = "%s " % (line_sep).join(it) + return out + else: + it = ["%s %s" % (key, style_to_xlwt(value, False)) + for key, value in item.items()] + out = "%s " % (field_sep).join(it) + return out + else: + item = "%s" % item + item = item.replace("True", "on") + item = item.replace("False", "off") + return item + + if style_dict: + xlwt_stylestr = style_to_xlwt(style_dict) + return xlwt.easyxf(xlwt_stylestr, field_sep=',', line_sep=';') + else: + return xlwt.XFStyle() + + @staticmethod + def to_xlsx(style_dict): + """ + converts a style_dict to an openpyxl style object + Parameters + ---------- + style_dict: style dictionary to convert + """ + + from openpyxl.style import Style + xls_style = Style() + for key, value in style_dict.items(): + for nk, nv in value.items(): + if key == "borders": + (xls_style.borders.__getattribute__(nk) + .__setattr__('border_style', nv)) + else: + xls_style.__getattribute__(key).__setattr__(nk, nv) + + return xls_style + + +def _conv_value(val): + #convert value for excel dump + if isinstance(val, np.int64): + val = int(val) + elif isinstance(val, np.bool8): + val = bool(val) + elif isinstance(val, Period): + val = "%s" % val + + return val + + class ExcelWriter(object): """ Class for writing DataFrame objects into excel sheets, uses xlwt for xls, @@ -1890,11 +2006,15 @@ def __init__(self, path): self.use_xlsx = False import xlwt self.book = xlwt.Workbook() - self.fm_datetime = xlwt.easyxf(num_format_str='YYYY-MM-DD HH:MM:SS') + self.fm_datetime = xlwt.easyxf( + num_format_str='YYYY-MM-DD HH:MM:SS') self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD') else: from openpyxl.workbook import Workbook - self.book = Workbook(optimized_write=True) + self.book = Workbook()#optimized_write=True) + #open pyxl 1.6.1 adds a dummy sheet remove it + if self.book.worksheets: + self.book.remove_sheet(self.book.worksheets[0]) self.path = path self.sheets = {} self.cur_sheet = None @@ -1905,16 +2025,18 @@ def save(self): """ self.book.save(self.path) - def writerow(self, row, sheet_name=None): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): """ - Write the given row into Excel an excel sheet + Write given formated cells into Excel an excel sheet Parameters ---------- - row : list - Row of data to save to Excel sheet + cells : generator + cell of formated data to save to Excel sheet sheet_name : string, default None Name of Excel sheet, if None, then use self.cur_sheet + startrow: upper left cell row to dump data frame + startcol: upper left cell column to dump data frame """ if sheet_name is None: sheet_name = self.cur_sheet @@ -1922,49 +2044,69 @@ def writerow(self, row, sheet_name=None): raise Exception('Must pass explicit sheet_name or set ' 'cur_sheet property') if self.use_xlsx: - self._writerow_xlsx(row, sheet_name) + self._writecells_xlsx(cells, sheet_name, startrow, startcol) else: - self._writerow_xls(row, sheet_name) + self._writecells_xls(cells, sheet_name, startrow, startcol) + + def _writecells_xlsx(self, cells, sheet_name, startrow, startcol): + + from openpyxl.cell import get_column_letter - def _writerow_xls(self, row, sheet_name): if sheet_name in self.sheets: - sheet, row_idx = self.sheets[sheet_name] + wks = self.sheets[sheet_name] else: - sheet = self.book.add_sheet(sheet_name) - row_idx = 0 - sheetrow = sheet.row(row_idx) - for i, val in enumerate(row): - if isinstance(val, (datetime.datetime, datetime.date)): - if isinstance(val, datetime.datetime): - sheetrow.write(i, val, self.fm_datetime) - else: - sheetrow.write(i, val, self.fm_date) - elif isinstance(val, np.int64): - sheetrow.write(i, int(val)) - elif isinstance(val, np.bool8): - sheetrow.write(i, bool(val)) - else: - sheetrow.write(i, val) - row_idx += 1 - if row_idx == 1000: - sheet.flush_row_data() - self.sheets[sheet_name] = (sheet, row_idx) - - def _writerow_xlsx(self, row, sheet_name): + wks = self.book.create_sheet() + wks.title = sheet_name + self.sheets[sheet_name] = wks + + for cell in cells: + colletter = get_column_letter(startcol + cell.col + 1) + xcell = wks.cell("%s%s" % (colletter, startrow + cell.row + 1)) + xcell.value = _conv_value(cell.val) + if cell.style: + style = CellStyleConverter.to_xlsx(cell.style) + for field in style.__fields__: + xcell.style.__setattr__(field, + style.__getattribute__(field)) + + if isinstance(cell.val, datetime.datetime): + xcell.style.number_format.format_code = "YYYY-MM-DD HH:MM:SS" + elif isinstance(cell.val, datetime.date): + xcell.style.number_format.format_code = "YYYY-MM-DD" + + #merging requires openpyxl latest (works on 1.6.1) + #todo add version check + if cell.mergestart is not None and cell.mergeend is not None: + cletterstart = get_column_letter(startcol + cell.col + 1) + cletterend = get_column_letter(startcol + cell.mergeend + 1) + + wks.merge_cells('%s%s:%s%s' % (cletterstart, + startrow + cell.row + 1, + cletterend, + startrow + cell.mergestart + 1)) + + def _writecells_xls(self, cells, sheet_name, startrow, startcol): if sheet_name in self.sheets: - sheet, row_idx = self.sheets[sheet_name] + wks = self.sheets[sheet_name] else: - sheet = self.book.create_sheet() - sheet.title = sheet_name - row_idx = 0 - - conv_row = [] - for val in row: - if isinstance(val, np.int64): - val = int(val) - elif isinstance(val, np.bool8): - val = bool(val) - conv_row.append(val) - sheet.append(conv_row) - row_idx += 1 - self.sheets[sheet_name] = (sheet, row_idx) + wks = self.book.add_sheet(sheet_name) + self.sheets[sheet_name] = wks + + for cell in cells: + val = _conv_value(cell.val) + style = CellStyleConverter.to_xls(cell.style) + if isinstance(val, datetime.datetime): + style.num_format_str = "YYYY-MM-DD HH:MM:SS" + elif isinstance(val, datetime.date): + style.num_format_str = "YYYY-MM-DD" + + if cell.mergestart is not None and cell.mergeend is not None: + wks.write_merge(startrow + cell.row, + startrow + cell.mergestart, + startcol + cell.col, + startcol + cell.mergeend, + val, style) + else: + wks.write(startrow + cell.row, + startcol + cell.col, + val, style) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index b76d9ea1e6052..61456d6dbfe2e 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3842,7 +3842,7 @@ def test_to_excel_from_excel(self): # test roundtrip self.frame.to_excel(path,'test1') reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0) + recons = reader.parse('test1', index_col=0, has_index_labels=True) assert_frame_equal(self.frame, recons) self.frame.to_excel(path,'test1', index=False) @@ -3851,19 +3851,19 @@ def test_to_excel_from_excel(self): recons.index = self.frame.index assert_frame_equal(self.frame, recons) - self.frame.to_excel(path,'test1') - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0, skiprows=[1]) - assert_frame_equal(self.frame.ix[1:], recons) + # self.frame.to_excel(path,'test1') + # reader = ExcelFile(path) + # recons = reader.parse('test1', index_col=0, skiprows=[2], has_index_labels=True) + # assert_frame_equal(self.frame.ix[1:], recons) self.frame.to_excel(path,'test1',na_rep='NA') reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0, na_values=['NA']) + recons = reader.parse('test1', index_col=0, na_values=['NA'], has_index_labels=True) assert_frame_equal(self.frame, recons) self.mixed_frame.to_excel(path,'test1') reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0) + recons = reader.parse('test1', index_col=0, has_index_labels=True) assert_frame_equal(self.mixed_frame, recons) self.tsframe.to_excel(path, 'test1') @@ -3891,7 +3891,7 @@ def test_to_excel_from_excel(self): self.tsframe.to_excel(writer,'test2') writer.save() reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) + recons = reader.parse('test1',index_col=0, has_index_labels=True) assert_frame_equal(self.frame, recons) recons = reader.parse('test2',index_col=0) assert_frame_equal(self.tsframe, recons) @@ -3903,11 +3903,46 @@ def test_to_excel_from_excel(self): col_aliases = Index(['AA', 'X', 'Y', 'Z']) self.frame2.to_excel(path, 'test1', header=col_aliases) reader = ExcelFile(path) - rs = reader.parse('test1', index_col=0) + rs = reader.parse('test1', index_col=0, has_index_labels=True) xp = self.frame2.copy() xp.columns = col_aliases assert_frame_equal(xp, rs) + # test index_label + frame = (DataFrame(np.random.randn(10,2)) >= 0) + frame.to_excel(path, 'test1', index_label=['test']) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=0, has_index_labels=True).astype(np.int64) + frame.index.names = ['test'] + self.assertEqual(frame.index.names, recons.index.names) + + frame = (DataFrame(np.random.randn(10,2)) >= 0) + frame.to_excel(path, 'test1', index_label=['test', 'dummy', 'dummy2']) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=0, has_index_labels=True).astype(np.int64) + frame.index.names = ['test'] + self.assertEqual(frame.index.names, recons.index.names) + + frame = (DataFrame(np.random.randn(10,2)) >= 0) + frame.to_excel(path, 'test1', index_label='test') + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=0, has_index_labels=True).astype(np.int64) + frame.index.names = ['test'] + self.assertEqual(frame.index.names, recons.index.names) + + #test index_labels in same row as column names + self.frame.to_excel('/tmp/tests.xls', 'test1', cols=['A', 'B', 'C', 'D'], index=False) + #take 'A' and 'B' as indexes (they are in same row as cols 'C', 'D') + df = self.frame.copy() + df = df.set_index(['A', 'B']) + + + reader = ExcelFile('/tmp/tests.xls') + recons = reader.parse('test1', index_col=[0, 1]) + assert_frame_equal(df, recons) + + + os.remove(path) # datetime.date, not sure what to test here exactly @@ -3971,7 +4006,7 @@ def test_to_excel_multiindex(self): # round trip frame.to_excel(path, 'test1') reader = ExcelFile(path) - df = reader.parse('test1', index_col=[0,1], parse_dates=False) + df = reader.parse('test1', index_col=[0,1], parse_dates=False, has_index_labels=True) assert_frame_equal(frame, df) self.assertEqual(frame.index.names, df.index.names) self.frame.index = old_index # needed if setUP becomes a classmethod @@ -3984,7 +4019,7 @@ def test_to_excel_multiindex(self): tsframe.to_excel(path, 'test1', index_label = ['time','foo']) reader = ExcelFile(path) - recons = reader.parse('test1', index_col=[0,1]) + recons = reader.parse('test1', index_col=[0,1], has_index_labels=True) assert_frame_equal(tsframe, recons) # infer index @@ -3993,22 +4028,28 @@ def test_to_excel_multiindex(self): recons = reader.parse('test1') assert_frame_equal(tsframe, recons) - # no index - tsframe.index.names = ['first', 'second'] - tsframe.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = reader.parse('test1') - assert_almost_equal(tsframe.values, - recons.ix[:, tsframe.columns].values) - self.assertEqual(len(tsframe.columns) + 2, len(recons.columns)) - - tsframe.index.names = [None, None] # no index - tsframe.to_excel(path, 'test1', index=False) - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=None) - assert_almost_equal(recons.values, self.tsframe.values) + #TODO : mention this does not make sence anymore + #with the new formatting as we are not alligning colnames and indexlabels + #on the same row + + # tsframe.index.names = ['first', 'second'] + # tsframe.to_excel(path, 'test1') + # reader = ExcelFile(path) + # recons = reader.parse('test1') + # assert_almost_equal(tsframe.values, + # recons.ix[:, tsframe.columns].values) + # self.assertEqual(len(tsframe.columns) + 2, len(recons.columns)) + + # tsframe.index.names = [None, None] + + # # no index + # tsframe.to_excel(path, 'test1', index=False) + # reader = ExcelFile(path) + # recons = reader.parse('test1', index_col=None) + # assert_almost_equal(recons.values, self.tsframe.values) + self.tsframe.index = old_index # needed if setUP becomes classmethod # write a big DataFrame @@ -4071,6 +4112,125 @@ def test_to_excel_unicode_filename(self): assert_frame_equal(rs, xp) os.remove(filename) + def test_to_excel_styleconverter(self): + from pandas.io.parsers import CellStyleConverter + try: + import xlwt + import openpyxl + except ImportError: + raise nose.SkipTest + + hstyle = {"font": {"bold": True}, + "borders": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "alignment": {"horizontal": "center"}} + xls_style = CellStyleConverter.to_xls(hstyle) + self.assertTrue(xls_style.font.bold) + self.assertEquals(xlwt.Borders.THIN, xls_style.borders.top) + self.assertEquals(xlwt.Borders.THIN, xls_style.borders.right) + self.assertEquals(xlwt.Borders.THIN, xls_style.borders.bottom) + self.assertEquals(xlwt.Borders.THIN, xls_style.borders.left) + self.assertEquals(xlwt.Alignment.HORZ_CENTER, xls_style.alignment.horz) + + xlsx_style = CellStyleConverter.to_xlsx(hstyle) + self.assertTrue(xlsx_style.font.bold) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + xlsx_style.borders.top.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + xlsx_style.borders.right.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + xlsx_style.borders.bottom.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + xlsx_style.borders.left.border_style) + self.assertEquals(openpyxl.style.Alignment.HORIZONTAL_CENTER, + xlsx_style.alignment.horizontal) + + def test_to_excel_header_styling(self): + + import StringIO + s = StringIO.StringIO( + """Date,ticker,type,value + 2001-01-01,x,close,12.2 + 2001-01-01,x,open ,12.1 + 2001-01-01,y,close,12.2 + 2001-01-01,y,open ,12.1 + 2001-02-01,x,close,12.2 + 2001-02-01,x,open ,12.1 + 2001-02-01,y,close,12.2 + 2001-02-01,y,open ,12.1 + 2001-03-01,x,close,12.2 + 2001-03-01,x,open ,12.1 + 2001-03-01,y,close,12.2 + 2001-03-01,y,open ,12.1""") + df = read_csv(s, parse_dates=["Date"]) + pdf = df.pivot_table(values="value", rows=["ticker"], + cols=["Date", "type"]) + + try: + import xlrd + import openpyxl + from openpyxl.cell import get_column_letter + except ImportError: + raise nose.SkipTest + + filename = '__tmp__.xls' + pdf.to_excel(filename, 'test1') + + + wbk = xlrd.open_workbook(filename, + formatting_info=True) + self.assertEquals(["test1"], wbk.sheet_names()) + ws = wbk.sheet_by_name('test1') + self.assertEquals([(0, 1, 5, 7), (0, 1, 3, 5), (0, 1, 1, 3)], + ws.merged_cells) + for i in range(0, 2): + for j in range(0, 7): + xfx = ws.cell_xf_index(0, 0) + cell_xf = wbk.xf_list[xfx] + font = wbk.font_list + self.assertEquals(1, font[cell_xf.font_index].bold) + self.assertEquals(1, cell_xf.border.top_line_style) + self.assertEquals(1, cell_xf.border.right_line_style) + self.assertEquals(1, cell_xf.border.bottom_line_style) + self.assertEquals(1, cell_xf.border.left_line_style) + self.assertEquals(2, cell_xf.alignment.hor_align) + + os.remove(filename) + # test xlsx_styling + filename = '__tmp__.xlsx' + pdf.to_excel(filename, 'test1') + + wbk = openpyxl.load_workbook(filename) + self.assertEquals(["test1"], wbk.get_sheet_names()) + ws = wbk.get_sheet_by_name('test1') + + xlsaddrs = ["%s2" % chr(i) for i in range(ord('A'), ord('H'))] + xlsaddrs += ["A%s" % i for i in range(1, 6)] + xlsaddrs += ["B1", "D1", "F1"] + for xlsaddr in xlsaddrs: + cell = ws.cell(xlsaddr) + self.assertTrue(cell.style.font.bold) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + cell.style.borders.top.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + cell.style.borders.right.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + cell.style.borders.bottom.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + cell.style.borders.left.border_style) + self.assertEquals(openpyxl.style.Alignment.HORIZONTAL_CENTER, + cell.style.alignment.horizontal) + + mergedcells_addrs = ["C1", "E1", "G1"] + for maddr in mergedcells_addrs: + self.assertTrue(ws.cell(maddr).merged) + + os.remove(filename) + + + def test_info(self): io = StringIO() self.frame.info(buf=io)