From b1e916ec8b93972de3a2f35f9b157a381016b582 Mon Sep 17 00:00:00 2001 From: locojaydev Date: Wed, 21 Nov 2012 16:14:04 -0500 Subject: [PATCH 01/10] excel format --- pandas/core/format.py | 92 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/pandas/core/format.py b/pandas/core/format.py index 13e504a8e1f88..0f47fffe8e957 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -16,6 +16,9 @@ import pandas.lib as lib import numpy as np +import itertools + +from collections import namedtuple docstring_to_string = """ Parameters @@ -407,6 +410,7 @@ def _get_column_name_list(self): names.append('' if columns.name is None else columns.name) return names + class HTMLFormatter(object): indent_delta = 2 @@ -681,6 +685,94 @@ def grouper(x): return result + +ExcelCell = namedtuple("ExcelCell", + 'row, col, val, style, mergestart, mergeend') + +header_style = {"font": {"bold": True}, + "borders": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "alignment": {"horizontal": "center"}} + + +class ExcelFormatter(object): + + def __init__(self, df): + self.df = df + self.rowcounter = 0 + + def _format_header_mi(self): + levels = self.df.columns.format(sparsify=True, adjoin=False, + names=False) + level_lenghts = fmt._get_level_lengths(levels) + for lnum, (records, values) in enumerate(zip(level_lenghts, + levels)): + name = self.df.columns.names[lnum] + yield ExcelCell(lnum, 0, name, header_style, None, None) + for i in records: + if records[i] > 1: + yield ExcelCell(lnum, i + 1, values[i], + header_style, lnum, i + records[i]) + else: + yield ExcelCell(lnum, i + 1, values[i], + header_style, None, None) + + self.rowcounter = lnum + + def _format_header_regular(self): + for colindex, colname in enumerate(self.df.columns): + yield ExcelCell(self.rowcounter, colindex, + colname, header_style, None, None) + + def _format_header(self): + if isinstance(self.df.columns, MultiIndex): + gen = self._format_header_mi() + else: + gen = self._format_header_regular() + + gen2 = () + if self.df.index.names: + row = [x if x is not None else '' + for x in self.df.index.names] + [''] * len(self.df.columns) + if reduce(lambda x, y: x and y, map(lambda x: x != '', row)): + gen2 = (ExcelCell(self.rowcounter, colindex, val, + header_style, None, None) + for colindex, val in enumerate(row)) + self.rowcounter += 1 + return itertools.chain(gen, gen2) + + def _format_body(self): + + if isinstance(self.df.columns, MultiIndex): + return self._format_hierarchical_rows() + else: + return self._format_regular_rows() + + def _format_regular_rows(self): + self.rowcounter += 1 + for colidx, colname in enumerate(self.df.columns): + series = self.df[colname] + for i, val in enumerate(series): + yield ExcelCell(self.rowcounter + i, colidx, + val, None, None, None) + + def _format_hierarchical_rows(self): + self.rowcounter += 1 + for idx, idxval in enumerate(self.df.index): + yield ExcelCell(self.rowcounter + idx, 0, + idxval, header_style, None, None) + + for colidx, colname in enumerate(self.df.columns): + series = self.df[colname] + for i, val in enumerate(series): + yield ExcelCell(self.rowcounter + i, + colidx + 1, val, None, None, None) + + def get_formatted_cells(self): + return itertools.chain(self._format_header(), self._format_body()) + #---------------------------------------------------------------------- # Array formatters From bce9118946b7e6495609843d976443882fc65c05 Mon Sep 17 00:00:00 2001 From: locojaydev Date: Wed, 21 Nov 2012 16:14:15 -0500 Subject: [PATCH 02/10] excel format --- pandas/core/frame.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f7f296e822e15..16fc1af16311c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1326,7 +1326,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', float_format=None, cols=None, header=True, index=True, - index_label=None): + index_label=None, startrow=0, startcol=0): """ Write DataFrame to a excel sheet @@ -1351,6 +1351,9 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. + startow : upper left cell row to dump data frame + startcol : upper left cell column to dump data frame + Notes ----- @@ -1367,11 +1370,14 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', if isinstance(excel_writer, basestring): excel_writer = ExcelWriter(excel_writer) need_save = True - excel_writer.cur_sheet = sheet_name - self._helper_csvexcel(excel_writer, na_rep=na_rep, - float_format=float_format, cols=cols, - header=header, index=index, - index_label=index_label) + # excel_writer.cur_sheet = sheet_name + # self._helper_csvexcel(excel_writer, na_rep=na_rep, + # float_format=float_format, cols=cols, + # header=header, index=index, + # index_label=index_label) + formatter = fmt.ExcelFormatter(self) + formatted_cells = formatter.get_formatted_cells() + excel_writer.write_cells(formatted_cells, sheet_name) if need_save: excel_writer.save() From b17806664cb53866e52027886704251cab56d7ff Mon Sep 17 00:00:00 2001 From: locojaydev Date: Wed, 21 Nov 2012 16:14:23 -0500 Subject: [PATCH 03/10] excel format --- pandas/io/parsers.py | 197 +++++++++++++++++++++++++++++++++---------- 1 file changed, 151 insertions(+), 46 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 52c8c4aa65a13..b807a9fa29091 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -6,6 +6,7 @@ from itertools import izip from urlparse import urlparse import csv +import xlwt try: next @@ -1881,6 +1882,89 @@ def _trim_excel_header(row): return row +class CellStyleConverter(object): + """ + Utility Class which converts a style dict to xlrd or openpyxl style + """ + + @staticmethod + def to_xls(style_dict): + """ + converts a style_dict to an xlwt style object + Parameters + ---------- + style_dict: style dictionary to convert + """ + def style_to_xlwt(item, firstlevel=True, field_sep=',', line_sep=';'): + """helper wich recursively generate an xlwt easy style string + for example: + + hstyle = {"font": {"bold": True}, + "border": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "align": {"horiz": "center"}} + will be converted to + font: bold on; \ + border: top thin, right thin, bottom thin, left thin; \ + align: horiz center; + """ + if hasattr(item, 'items'): + if firstlevel: + it = ["%s: %s" % (key, style_to_xlwt(value, False)) + for key, value in item.items()] + out = "%s " % (line_sep).join(it) + return out + else: + it = ["%s %s" % (key, style_to_xlwt(value, False)) + for key, value in item.items()] + out = "%s " % (field_sep).join(it) + return out + else: + item = "%s" % item + item = item.replace("True", "on") + item = item.replace("False", "off") + return item + + if style_dict: + xlwt_stylestr = style_to_xlwt(style_dict) + return xlwt.easyxf(xlwt_stylestr, field_sep=',', line_sep=';') + else: + return xlwt.XFStyle() + + @staticmethod + def to_xlsx(style_dict): + """ + converts a style_dict to an openpyxl style object + Parameters + ---------- + style_dict: style dictionary to convert + """ + + from openpyxl.style import Style + xls_style = Style() + for key, value in style_dict.items(): + for nk, nv in value.items(): + if key == "borders": + (xls_style.borders.__getattribute__(nk) + .__setattr__('border_style', nv)) + else: + xls_style.__getattribute__(key).__setattr__(nk, nv) + + return xls_style + + +def _conv_value(val): + #convert value for excel dump + if isinstance(val, np.int64): + val = int(val) + if isinstance(val, np.bool8): + val = bool(val) + + return val + + class ExcelWriter(object): """ Class for writing DataFrame objects into excel sheets, uses xlwt for xls, @@ -1897,11 +1981,12 @@ def __init__(self, path): self.use_xlsx = False import xlwt self.book = xlwt.Workbook() - self.fm_datetime = xlwt.easyxf(num_format_str='YYYY-MM-DD HH:MM:SS') + self.fm_datetime = xlwt.easyxf( + num_format_str='YYYY-MM-DD HH:MM:SS') self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD') else: from openpyxl.workbook import Workbook - self.book = Workbook(optimized_write=True) + self.book = Workbook()#optimized_write=True) self.path = path self.sheets = {} self.cur_sheet = None @@ -1912,16 +1997,18 @@ def save(self): """ self.book.save(self.path) - def writerow(self, row, sheet_name=None): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): """ - Write the given row into Excel an excel sheet + Write given formated cells into Excel an excel sheet Parameters ---------- - row : list - Row of data to save to Excel sheet + cells : generator + cell of formated data to save to Excel sheet sheet_name : string, default None Name of Excel sheet, if None, then use self.cur_sheet + startrow: upper left cell row to dump data frame + startcol: upper left cell column to dump data frame """ if sheet_name is None: sheet_name = self.cur_sheet @@ -1929,49 +2016,67 @@ def writerow(self, row, sheet_name=None): raise Exception('Must pass explicit sheet_name or set ' 'cur_sheet property') if self.use_xlsx: - self._writerow_xlsx(row, sheet_name) + self._writecells_xlsx(cells, sheet_name, startrow, startcol) else: - self._writerow_xls(row, sheet_name) + self._writecells_xls(cells, sheet_name, startrow, startcol) + + def _writecells_xlsx(self, cells, sheet_name, startrow, startcol): + from openpyxl.cell import get_column_letter - def _writerow_xls(self, row, sheet_name): if sheet_name in self.sheets: - sheet, row_idx = self.sheets[sheet_name] + wks = self.sheets[sheet_name] else: - sheet = self.book.add_sheet(sheet_name) - row_idx = 0 - sheetrow = sheet.row(row_idx) - for i, val in enumerate(row): - if isinstance(val, (datetime.datetime, datetime.date)): - if isinstance(val, datetime.datetime): - sheetrow.write(i, val, self.fm_datetime) - else: - sheetrow.write(i, val, self.fm_date) - elif isinstance(val, np.int64): - sheetrow.write(i, int(val)) - elif isinstance(val, np.bool8): - sheetrow.write(i, bool(val)) - else: - sheetrow.write(i, val) - row_idx += 1 - if row_idx == 1000: - sheet.flush_row_data() - self.sheets[sheet_name] = (sheet, row_idx) - - def _writerow_xlsx(self, row, sheet_name): + wks = self.book.create_sheet() + wks.title = sheet_name + self.sheets[sheet_name] = wks + + for cell in cells: + colletter = get_column_letter(startcol + cell.col + 1) + xcell = wks.cell("%s%s" % (colletter, startrow + cell.row + 1)) + xcell.value = _conv_value(cell.val) + if cell.style: + style = CellStyleConverter.to_xlsx(cell.style) + for field in style.__fields__: + xcell.style.__setattr__(field, + style.__getattribute__(field)) + + if isinstance(cell.val, datetime.datetime): + style.num_format_str = "YYYY-MM-DD HH:SS" + elif isinstance(cell.val, datetime.date): + style.num_format_str = "YYYY-MM-DD" + + #merging requires openpyxl latest (works on 1.5.7) + if cell.mergestart is not None and cell.mergeend is not None: + cletterstart = get_column_letter(startcol + cell.col + 1) + cletterend = get_column_letter(startcol + cell.mergeend + 1) + + wks.merge_cells('%s%s:%s%s' % (cletterstart, + startrow + cell.row + 1, + cletterend, + startrow + cell.mergestart + 1)) + + def _writecells_xls(self, cells, sheet_name, startrow, startcol): if sheet_name in self.sheets: - sheet, row_idx = self.sheets[sheet_name] + wks = self.sheets[sheet_name] else: - sheet = self.book.create_sheet() - sheet.title = sheet_name - row_idx = 0 - - conv_row = [] - for val in row: - if isinstance(val, np.int64): - val = int(val) - elif isinstance(val, np.bool8): - val = bool(val) - conv_row.append(val) - sheet.append(conv_row) - row_idx += 1 - self.sheets[sheet_name] = (sheet, row_idx) + wks = self.book.add_sheet(sheet_name) + self.sheets[sheet_name] = wks + + for cell in cells: + val = _conv_value(cell.val) + style = CellStyleConverter.to_xls(cell.style) + if isinstance(val, datetime.datetime): + style.num_format_str = "YYYY-MM-DD HH:SS" + elif isinstance(val, datetime.date): + style.num_format_str = "YYYY-MM-DD" + + if cell.mergestart is not None and cell.mergeend is not None: + wks.write_merge(startrow + cell.row, + startrow + cell.mergestart, + startcol + cell.col, + startcol + cell.mergeend, + val, style) + else: + wks.write(startrow + cell.row, + startcol + cell.col, + val, style) From afde3f21f088afa784d0d6b05c0703f6db59e13b Mon Sep 17 00:00:00 2001 From: locojaydev Date: Wed, 21 Nov 2012 20:12:51 -0500 Subject: [PATCH 04/10] adding na_repl, cols argument to excel formatter --- pandas/core/format.py | 85 ++++++++++++++++++++++++----------- pandas/core/frame.py | 17 +++---- pandas/io/parsers.py | 5 ++- pandas/src/parse_helper.h | 2 +- pandas/src/parser/tokenizer.c | 2 +- 5 files changed, 72 insertions(+), 39 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index 0f47fffe8e957..70c2d8707e53f 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -16,9 +16,9 @@ import pandas.lib as lib import numpy as np + import itertools -from collections import namedtuple docstring_to_string = """ Parameters @@ -686,8 +686,23 @@ def grouper(x): return result -ExcelCell = namedtuple("ExcelCell", - 'row, col, val, style, mergestart, mergeend') +#from collections import namedtuple +# ExcelCell = namedtuple("ExcelCell", +# 'row, col, val, style, mergestart, mergeend') + +class ExcelCell: + __fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend') + __slots__ = __fields__ + + def __init__(self, row, col, val, + style=None, mergestart=None, mergeend=None): + self.row = row + self.col = col + self.val = val + self.style = style + self.mergestart = mergestart + self.mergeend = mergeend + header_style = {"font": {"bold": True}, "borders": {"top": "thin", @@ -698,36 +713,55 @@ def grouper(x): class ExcelFormatter(object): + """ + Class for formatting a DataFrame to a list of ExcelCells, + + Parameters + ---------- + df : dataframe + na_rep: na representation + index : boolean, default True + output row names (index) + cols : sequence, optional + Columns to write + """ - def __init__(self, df): + def __init__(self, df, na_rep='', cols=None): self.df = df self.rowcounter = 0 + self.na_rep = na_rep + self.columns = cols + if cols is None: + self.columns = df.columns + + def _format_value(self, val): + if lib.checknull(val): + val = self.na_rep + return val def _format_header_mi(self): - levels = self.df.columns.format(sparsify=True, adjoin=False, + levels = self.columns.format(sparsify=True, adjoin=False, names=False) - level_lenghts = fmt._get_level_lengths(levels) + level_lenghts = _get_level_lengths(levels) for lnum, (records, values) in enumerate(zip(level_lenghts, levels)): - name = self.df.columns.names[lnum] - yield ExcelCell(lnum, 0, name, header_style, None, None) + name = self.columns.names[lnum] + yield ExcelCell(lnum, 0, name, header_style) for i in records: if records[i] > 1: yield ExcelCell(lnum, i + 1, values[i], header_style, lnum, i + records[i]) else: - yield ExcelCell(lnum, i + 1, values[i], - header_style, None, None) + yield ExcelCell(lnum, i + 1, values[i], header_style) self.rowcounter = lnum def _format_header_regular(self): - for colindex, colname in enumerate(self.df.columns): - yield ExcelCell(self.rowcounter, colindex, - colname, header_style, None, None) + for colindex, colname in enumerate(self.columns): + yield ExcelCell(self.rowcounter, colindex, colname, header_style) def _format_header(self): - if isinstance(self.df.columns, MultiIndex): + if isinstance(self.columns, MultiIndex): gen = self._format_header_mi() else: gen = self._format_header_regular() @@ -735,10 +769,9 @@ def _format_header(self): gen2 = () if self.df.index.names: row = [x if x is not None else '' - for x in self.df.index.names] + [''] * len(self.df.columns) + for x in self.df.index.names] + [''] * len(self.columns) if reduce(lambda x, y: x and y, map(lambda x: x != '', row)): - gen2 = (ExcelCell(self.rowcounter, colindex, val, - header_style, None, None) + gen2 = (ExcelCell(self.rowcounter, colindex, val, header_style) for colindex, val in enumerate(row)) self.rowcounter += 1 return itertools.chain(gen, gen2) @@ -752,26 +785,26 @@ def _format_body(self): def _format_regular_rows(self): self.rowcounter += 1 - for colidx, colname in enumerate(self.df.columns): + for colidx, colname in enumerate(self.columns): series = self.df[colname] for i, val in enumerate(series): - yield ExcelCell(self.rowcounter + i, colidx, - val, None, None, None) + yield ExcelCell(self.rowcounter + i, colidx, val) def _format_hierarchical_rows(self): self.rowcounter += 1 for idx, idxval in enumerate(self.df.index): - yield ExcelCell(self.rowcounter + idx, 0, - idxval, header_style, None, None) + yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style) - for colidx, colname in enumerate(self.df.columns): + for colidx, colname in enumerate(self.columns): series = self.df[colname] for i, val in enumerate(series): - yield ExcelCell(self.rowcounter + i, - colidx + 1, val, None, None, None) + yield ExcelCell(self.rowcounter + i, colidx + 1, val) def get_formatted_cells(self): - return itertools.chain(self._format_header(), self._format_body()) + for cell in itertools.chain(self._format_header(), + self._format_body()): + cell.val = self._format_value(cell.val) + yield cell #---------------------------------------------------------------------- # Array formatters diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 16fc1af16311c..207f75cb7cbf7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1180,7 +1180,7 @@ def to_panel(self): to_wide = deprecate('to_wide', to_panel) - def _helper_csvexcel(self, writer, na_rep=None, cols=None, + def _helper_csv(self, writer, na_rep=None, cols=None, header=True, index=True, index_label=None, float_format=None): if cols is None: @@ -1315,7 +1315,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, else: csvout = csv.writer(f, lineterminator='\n', delimiter=sep, quoting=quoting) - self._helper_csvexcel(csvout, na_rep=na_rep, + self._helper_csv(csvout, na_rep=na_rep, float_format=float_format, cols=cols, header=header, index=index, index_label=index_label) @@ -1368,16 +1368,13 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', from pandas.io.parsers import ExcelWriter need_save = False if isinstance(excel_writer, basestring): - excel_writer = ExcelWriter(excel_writer) + excel_writer = ExcelWriter(excel_writer, na_rep=na_rep) need_save = True - # excel_writer.cur_sheet = sheet_name - # self._helper_csvexcel(excel_writer, na_rep=na_rep, - # float_format=float_format, cols=cols, - # header=header, index=index, - # index_label=index_label) - formatter = fmt.ExcelFormatter(self) + + formatter = fmt.ExcelFormatter(self, na_rep=na_rep, cols=cols) formatted_cells = formatter.get_formatted_cells() - excel_writer.write_cells(formatted_cells, sheet_name) + excel_writer.write_cells(formatted_cells, sheet_name, + startrow=startrow, startcol=startcol) if need_save: excel_writer.save() diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b807a9fa29091..7f992e883436e 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1959,8 +1959,10 @@ def _conv_value(val): #convert value for excel dump if isinstance(val, np.int64): val = int(val) - if isinstance(val, np.bool8): + elif isinstance(val, np.bool8): val = bool(val) + elif isinstance(val, lib.Timestamp): + val = val._repr_base return val @@ -2021,6 +2023,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): self._writecells_xls(cells, sheet_name, startrow, startcol) def _writecells_xlsx(self, cells, sheet_name, startrow, startcol): + from openpyxl.cell import get_column_letter if sheet_name in self.sheets: diff --git a/pandas/src/parse_helper.h b/pandas/src/parse_helper.h index 02f3003cc401e..4d1aa738a701b 100644 --- a/pandas/src/parse_helper.h +++ b/pandas/src/parse_helper.h @@ -1,7 +1,7 @@ #include #include -double PANDAS_INLINE xstrtod(const char *p, char **q, char decimal, char sci, int skip_trailing); +double xstrtod(const char *p, char **q, char decimal, char sci, int skip_trailing); int to_double(char *item, double *p_value, char sci, char decimal) { diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c index 6929c7b26989d..5175965fe87ab 100644 --- a/pandas/src/parser/tokenizer.c +++ b/pandas/src/parser/tokenizer.c @@ -395,7 +395,7 @@ int P_INLINE end_field(parser_t *self) { return 0; } -int P_INLINE end_line(parser_t *self) { +int end_line(parser_t *self) { int fields; khiter_t k; /* for hash set detection */ int ex_fields = -1; From d354267ee0859b8fb5c55a2a418e8cb14768e6b2 Mon Sep 17 00:00:00 2001 From: locojaydev Date: Thu, 22 Nov 2012 16:10:36 -0500 Subject: [PATCH 05/10] adding float_format to ExcelFormatter --- pandas/core/format.py | 16 +++++++++++++--- pandas/core/frame.py | 7 +++++-- pandas/io/parsers.py | 3 ++- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index 70c2d8707e53f..650dccace4d5e 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -720,23 +720,33 @@ class ExcelFormatter(object): ---------- df : dataframe na_rep: na representation - index : boolean, default True - output row names (index) + float_format : string, default None + Format string for floating point numbers cols : sequence, optional Columns to write + index : boolean, default True + output row names (index) """ - def __init__(self, df, na_rep='', cols=None): + def __init__(self, + df, + na_rep='', + float_format=None, + cols=None + ): self.df = df self.rowcounter = 0 self.na_rep = na_rep self.columns = cols if cols is None: self.columns = df.columns + self.float_format = float_format def _format_value(self, val): if lib.checknull(val): val = self.na_rep + if self.float_format is not None and com.is_float(val): + val = self.float_format % val return val def _format_header_mi(self): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 207f75cb7cbf7..98f06891ec165 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1368,10 +1368,13 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', from pandas.io.parsers import ExcelWriter need_save = False if isinstance(excel_writer, basestring): - excel_writer = ExcelWriter(excel_writer, na_rep=na_rep) + excel_writer = ExcelWriter(excel_writer) need_save = True - formatter = fmt.ExcelFormatter(self, na_rep=na_rep, cols=cols) + formatter = fmt.ExcelFormatter(self, + na_rep=na_rep, + cols=cols, + float_format=float_format) formatted_cells = formatter.get_formatted_cells() excel_writer.write_cells(formatted_cells, sheet_name, startrow=startrow, startcol=startcol) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 7f992e883436e..df1cd06034ea1 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2048,7 +2048,8 @@ def _writecells_xlsx(self, cells, sheet_name, startrow, startcol): elif isinstance(cell.val, datetime.date): style.num_format_str = "YYYY-MM-DD" - #merging requires openpyxl latest (works on 1.5.7) + #merging requires openpyxl latest (works on 1.6.1) + #todo add version check if cell.mergestart is not None and cell.mergeend is not None: cletterstart = get_column_letter(startcol + cell.col + 1) cletterend = get_column_letter(startcol + cell.mergeend + 1) From f13d09383e27e8205b7a515a28d8424af87e8f28 Mon Sep 17 00:00:00 2001 From: locojaydev Date: Wed, 28 Nov 2012 18:30:15 -0500 Subject: [PATCH 06/10] excelformatter handles multiindex, aliases --- pandas/core/format.py | 106 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 93 insertions(+), 13 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index 89c34052caecd..db50955c13c3e 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -717,15 +717,25 @@ class ExcelFormatter(object): Format string for floating point numbers cols : sequence, optional Columns to write + header : boolean or list of string, default True + Write out column names. If a list of string is given it is + assumed to be aliases for the column names index : boolean, default True output row names (index) + index_label : string or sequence, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. """ def __init__(self, df, na_rep='', float_format=None, - cols=None + cols=None, + header=True, + index=True, + index_label=None ): self.df = df self.rowcounter = 0 @@ -734,34 +744,58 @@ def __init__(self, if cols is None: self.columns = df.columns self.float_format = float_format + self.index = index + self.index_label = index_label + self.header = header def _format_value(self, val): if lib.checknull(val): val = self.na_rep if self.float_format is not None and com.is_float(val): - val = self.float_format % val + val = float(self.float_format % val) return val def _format_header_mi(self): levels = self.columns.format(sparsify=True, adjoin=False, names=False) level_lenghts = _get_level_lengths(levels) + coloffset = 0 + if isinstance(self.df.index, MultiIndex): + coloffset = len(self.df.index[0]) - 1 + for lnum, (records, values) in enumerate(zip(level_lenghts, levels)): name = self.columns.names[lnum] - yield ExcelCell(lnum, 0, name, header_style) + yield ExcelCell(lnum, coloffset, name, header_style) for i in records: if records[i] > 1: - yield ExcelCell(lnum, i + 1, values[i], - header_style, lnum, i + records[i]) + yield ExcelCell(lnum,coloffset + i + 1, values[i], + header_style, lnum, coloffset + i + records[i]) else: - yield ExcelCell(lnum, i + 1, values[i], header_style) + yield ExcelCell(lnum, coloffset + i + 1, values[i], header_style) self.rowcounter = lnum def _format_header_regular(self): - for colindex, colname in enumerate(self.columns): - yield ExcelCell(self.rowcounter, colindex, colname, header_style) + has_aliases = isinstance(self.header, (tuple, list, np.ndarray)) + if has_aliases or self.header: + coloffset = 0 + if self.index: + coloffset = 1 + if isinstance(self.df.index, MultiIndex): + coloffset = len(self.df.index[0]) + + colnames = self.columns + if has_aliases: + if len(self.header) != len(self.columns): + raise ValueError(('Writing %d cols but got %d aliases' + % (len(self.columns), len(self.header)))) + else: + colnames = self.header + + for colindex, colname in enumerate(colnames): + yield ExcelCell(self.rowcounter, colindex + coloffset, colname, + header_style) def _format_header(self): if isinstance(self.columns, MultiIndex): @@ -781,27 +815,73 @@ def _format_header(self): def _format_body(self): - if isinstance(self.df.columns, MultiIndex): + if isinstance(self.df.index, MultiIndex): return self._format_hierarchical_rows() else: return self._format_regular_rows() def _format_regular_rows(self): self.rowcounter += 1 + + coloffset = 0 + #output index and index_label? + if self.index: + #chek aliases + #if list only take first as this is not a MultiIndex + if self.index_label and isinstance(self.index_label, + (list, tuple, np.ndarray)): + index_label = self.index_label[0] + #if string good to go + elif self.index_label and isinstance(self.index_label, str): + index_label = self.index_label + else: + index_label = self.df.index.names[0] + + if index_label: + yield ExcelCell(self.rowcounter, 0, + index_label, header_style) + self.rowcounter += 1 + + #write index_values + index_values = self.df.index + coloffset = 1 + for idx, idxval in enumerate(index_values): + yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style) + for colidx, colname in enumerate(self.columns): series = self.df[colname] for i, val in enumerate(series): - yield ExcelCell(self.rowcounter + i, colidx, val) + yield ExcelCell(self.rowcounter + i, colidx + coloffset, val) def _format_hierarchical_rows(self): self.rowcounter += 1 - for idx, idxval in enumerate(self.df.index): - yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style) + + gcolidx = 0 + #output index and index_label? + if self.index: + index_labels = self.df.index.names + #check for aliases + if self.index_label and isinstance(self.index_label, + (list, tuple, np.ndarray)): + index_labels = self.index_label + + #if index labels are not empty go ahead and dump + if filter(lambda x: x is not None, index_labels): + for cidx, name in enumerate(index_labels): + yield ExcelCell(self.rowcounter, cidx, + name, header_style) + self.rowcounter += 1 + + for indexcolvals in zip(*self.df.index): + for idx, indexcolval in enumerate(indexcolvals): + yield ExcelCell(self.rowcounter + idx, gcolidx, + indexcolval, header_style) + gcolidx += 1 for colidx, colname in enumerate(self.columns): series = self.df[colname] for i, val in enumerate(series): - yield ExcelCell(self.rowcounter + i, colidx + 1, val) + yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val) def get_formatted_cells(self): for cell in itertools.chain(self._format_header(), From 9ae35f8c2790178b8e1c6e209c6e83e46f9ce43c Mon Sep 17 00:00:00 2001 From: locojaydev Date: Wed, 28 Nov 2012 18:31:10 -0500 Subject: [PATCH 07/10] hadling all attributes --- pandas/core/frame.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 84a5d0e978ecf..ebe361a33b28c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1415,7 +1415,10 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', formatter = fmt.ExcelFormatter(self, na_rep=na_rep, cols=cols, - float_format=float_format) + header=header, + float_format=float_format, + index=index, + index_label=index_label) formatted_cells = formatter.get_formatted_cells() excel_writer.write_cells(formatted_cells, sheet_name, startrow=startrow, startcol=startcol) From 5138bdc53858183a118c6abd15c40414cb16cab1 Mon Sep 17 00:00:00 2001 From: locojaydev Date: Wed, 28 Nov 2012 18:34:16 -0500 Subject: [PATCH 08/10] reader bug fix (colnames was None.1,....), datetime hadling, period --- pandas/io/parsers.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 927fa87d317b3..309a19ad3ed50 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -21,6 +21,7 @@ import pandas.lib as lib import pandas._parser as _parser +from pandas.tseries.period import Period class DateConversionError(Exception): pass @@ -970,6 +971,10 @@ def TextParser(*args, **kwds): # verbose=False, encoding=None, squeeze=False): +def count_empty_vals(vals): + return sum([1 for v in vals if v == '' or v is None]) + + class PythonParser(ParserBase): def __init__(self, f, **kwds): @@ -1100,6 +1105,13 @@ def read(self, rows=None): self.index_col, self.index_names) + #handle new style for names in index + count_empty_content_vals = count_empty_vals(content[0]) + indexnamerow = None + if count_empty_content_vals == len(columns): + indexnamerow = content[0] + content = content[1:] + alldata = self._rows_to_cols(content) data = self._exclude_implicit_index(alldata) @@ -1107,6 +1119,9 @@ def read(self, rows=None): data = self._convert_data(data) index = self._make_index(data, alldata, columns) + if indexnamerow: + coffset = len(indexnamerow) - len(columns) + index.names = indexnamerow[:coffset] return index, columns, data @@ -1870,7 +1885,8 @@ def sheet_names(self): def _trim_excel_header(row): # trim header row so auto-index inference works - while len(row) > 0 and row[0] == '': + # xlrd uses '' , openpyxl None + while len(row) > 0 and (row[0] == '' or row[0] is None): row = row[1:] return row @@ -1954,8 +1970,8 @@ def _conv_value(val): val = int(val) elif isinstance(val, np.bool8): val = bool(val) - elif isinstance(val, lib.Timestamp): - val = val._repr_base + elif isinstance(val, Period): + val = "%s" % val return val @@ -1982,6 +1998,9 @@ def __init__(self, path): else: from openpyxl.workbook import Workbook self.book = Workbook()#optimized_write=True) + #open pyxl 1.6.1 adds a dummy sheet remove it + if self.book.worksheets: + self.book.remove_sheet(self.book.worksheets[0]) self.path = path self.sheets = {} self.cur_sheet = None @@ -2037,9 +2056,9 @@ def _writecells_xlsx(self, cells, sheet_name, startrow, startcol): style.__getattribute__(field)) if isinstance(cell.val, datetime.datetime): - style.num_format_str = "YYYY-MM-DD HH:SS" + xcell.style.number_format.format_code = "YYYY-MM-DD HH:MM:SS" elif isinstance(cell.val, datetime.date): - style.num_format_str = "YYYY-MM-DD" + xcell.style.number_format.format_code = "YYYY-MM-DD" #merging requires openpyxl latest (works on 1.6.1) #todo add version check @@ -2063,7 +2082,7 @@ def _writecells_xls(self, cells, sheet_name, startrow, startcol): val = _conv_value(cell.val) style = CellStyleConverter.to_xls(cell.style) if isinstance(val, datetime.datetime): - style.num_format_str = "YYYY-MM-DD HH:SS" + style.num_format_str = "YYYY-MM-DD HH:MM:SS" elif isinstance(val, datetime.date): style.num_format_str = "YYYY-MM-DD" From c1708b2f7e35235d9301c7e5d68688c55e0dbff9 Mon Sep 17 00:00:00 2001 From: locojaydev Date: Wed, 28 Nov 2012 18:45:36 -0500 Subject: [PATCH 09/10] adding styling test --- pandas/tests/test_frame.py | 177 +++++++++++++++++++++++++++++++++---- 1 file changed, 162 insertions(+), 15 deletions(-) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index b76d9ea1e6052..5b1f900904730 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3853,7 +3853,7 @@ def test_to_excel_from_excel(self): self.frame.to_excel(path,'test1') reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0, skiprows=[1]) + recons = reader.parse('test1', index_col=0, skiprows=[2]) assert_frame_equal(self.frame.ix[1:], recons) self.frame.to_excel(path,'test1',na_rep='NA') @@ -3908,6 +3908,28 @@ def test_to_excel_from_excel(self): xp.columns = col_aliases assert_frame_equal(xp, rs) + # test index_label + frame = (DataFrame(np.random.randn(10,2)) >= 0) + frame.to_excel(path, 'test1', index_label=['test']) + reader = ExcelFile(path) + recons = reader.parse('test1').astype(np.int64) + frame.index.names = ['test'] + self.assertEqual(frame.index.names, recons.index.names) + + frame = (DataFrame(np.random.randn(10,2)) >= 0) + frame.to_excel(path, 'test1', index_label=['test', 'dummy', 'dummy2']) + reader = ExcelFile(path) + recons = reader.parse('test1').astype(np.int64) + frame.index.names = ['test'] + self.assertEqual(frame.index.names, recons.index.names) + + frame = (DataFrame(np.random.randn(10,2)) >= 0) + frame.to_excel(path, 'test1', index_label='test') + reader = ExcelFile(path) + recons = reader.parse('test1').astype(np.int64) + frame.index.names = ['test'] + self.assertEqual(frame.index.names, recons.index.names) + os.remove(path) # datetime.date, not sure what to test here exactly @@ -3993,22 +4015,28 @@ def test_to_excel_multiindex(self): recons = reader.parse('test1') assert_frame_equal(tsframe, recons) - # no index - tsframe.index.names = ['first', 'second'] - tsframe.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = reader.parse('test1') - assert_almost_equal(tsframe.values, - recons.ix[:, tsframe.columns].values) - self.assertEqual(len(tsframe.columns) + 2, len(recons.columns)) - - tsframe.index.names = [None, None] # no index - tsframe.to_excel(path, 'test1', index=False) - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=None) - assert_almost_equal(recons.values, self.tsframe.values) + #TODO : mention this does not make sence anymore + #with the new formatting as we are not alligning colnames and indexlabels + #on the same row + + # tsframe.index.names = ['first', 'second'] + # tsframe.to_excel(path, 'test1') + # reader = ExcelFile(path) + # recons = reader.parse('test1') + # assert_almost_equal(tsframe.values, + # recons.ix[:, tsframe.columns].values) + # self.assertEqual(len(tsframe.columns) + 2, len(recons.columns)) + + # tsframe.index.names = [None, None] + + # # no index + # tsframe.to_excel(path, 'test1', index=False) + # reader = ExcelFile(path) + # recons = reader.parse('test1', index_col=None) + # assert_almost_equal(recons.values, self.tsframe.values) + self.tsframe.index = old_index # needed if setUP becomes classmethod # write a big DataFrame @@ -4071,6 +4099,125 @@ def test_to_excel_unicode_filename(self): assert_frame_equal(rs, xp) os.remove(filename) + def test_to_excel_styleconverter(self): + from pandas.io.parsers import CellStyleConverter + try: + import xlwt + import openpyxl + except ImportError: + raise nose.SkipTest + + hstyle = {"font": {"bold": True}, + "borders": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "alignment": {"horizontal": "center"}} + xls_style = CellStyleConverter.to_xls(hstyle) + self.assertTrue(xls_style.font.bold) + self.assertEquals(xlwt.Borders.THIN, xls_style.borders.top) + self.assertEquals(xlwt.Borders.THIN, xls_style.borders.right) + self.assertEquals(xlwt.Borders.THIN, xls_style.borders.bottom) + self.assertEquals(xlwt.Borders.THIN, xls_style.borders.left) + self.assertEquals(xlwt.Alignment.HORZ_CENTER, xls_style.alignment.horz) + + xlsx_style = CellStyleConverter.to_xlsx(hstyle) + self.assertTrue(xlsx_style.font.bold) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + xlsx_style.borders.top.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + xlsx_style.borders.right.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + xlsx_style.borders.bottom.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + xlsx_style.borders.left.border_style) + self.assertEquals(openpyxl.style.Alignment.HORIZONTAL_CENTER, + xlsx_style.alignment.horizontal) + + def test_to_excel_header_styling(self): + + import StringIO + s = StringIO.StringIO( + """Date,ticker,type,value + 2001-01-01,x,close,12.2 + 2001-01-01,x,open ,12.1 + 2001-01-01,y,close,12.2 + 2001-01-01,y,open ,12.1 + 2001-02-01,x,close,12.2 + 2001-02-01,x,open ,12.1 + 2001-02-01,y,close,12.2 + 2001-02-01,y,open ,12.1 + 2001-03-01,x,close,12.2 + 2001-03-01,x,open ,12.1 + 2001-03-01,y,close,12.2 + 2001-03-01,y,open ,12.1""") + df = read_csv(s, parse_dates=["Date"]) + pdf = df.pivot_table(values="value", rows=["ticker"], + cols=["Date", "type"]) + + try: + import xlrd + import openpyxl + from openpyxl.cell import get_column_letter + except ImportError: + raise nose.SkipTest + + filename = '__tmp__.xls' + pdf.to_excel(filename, 'test1') + + + wbk = xlrd.open_workbook(filename, + formatting_info=True) + self.assertEquals(["test1"], wbk.sheet_names()) + ws = wbk.sheet_by_name('test1') + self.assertEquals([(0, 1, 5, 7), (0, 1, 3, 5), (0, 1, 1, 3)], + ws.merged_cells) + for i in range(0, 2): + for j in range(0, 7): + xfx = ws.cell_xf_index(0, 0) + cell_xf = wbk.xf_list[xfx] + font = wbk.font_list + self.assertEquals(1, font[cell_xf.font_index].bold) + self.assertEquals(1, cell_xf.border.top_line_style) + self.assertEquals(1, cell_xf.border.right_line_style) + self.assertEquals(1, cell_xf.border.bottom_line_style) + self.assertEquals(1, cell_xf.border.left_line_style) + self.assertEquals(2, cell_xf.alignment.hor_align) + + os.remove(filename) + # test xlsx_styling + filename = '__tmp__.xlsx' + pdf.to_excel(filename, 'test1') + + wbk = openpyxl.load_workbook(filename) + self.assertEquals(["test1"], wbk.get_sheet_names()) + ws = wbk.get_sheet_by_name('test1') + + xlsaddrs = ["%s2" % chr(i) for i in range(ord('A'), ord('H'))] + xlsaddrs += ["A%s" % i for i in range(1, 6)] + xlsaddrs += ["B1", "D1", "F1"] + for xlsaddr in xlsaddrs: + cell = ws.cell(xlsaddr) + self.assertTrue(cell.style.font.bold) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + cell.style.borders.top.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + cell.style.borders.right.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + cell.style.borders.bottom.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + cell.style.borders.left.border_style) + self.assertEquals(openpyxl.style.Alignment.HORIZONTAL_CENTER, + cell.style.alignment.horizontal) + + mergedcells_addrs = ["C1", "E1", "G1"] + for maddr in mergedcells_addrs: + self.assertTrue(ws.cell(maddr).merged) + + os.remove(filename) + + + def test_info(self): io = StringIO() self.frame.info(buf=io) From 389da90bd96018511945060f17cdaca4ef94d107 Mon Sep 17 00:00:00 2001 From: locojaydev Date: Thu, 29 Nov 2012 11:54:20 -0500 Subject: [PATCH 10/10] adding argument has index_labels to excel reader to handle index_labels not in the same row as columnnames has_index_labels: boolean, default False True if the cols defined in index_col have an index name and are not in the header --- pandas/io/parsers.py | 22 ++++++++++++++++---- pandas/tests/test_frame.py | 41 +++++++++++++++++++++++++------------- 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 309a19ad3ed50..14a01b38ae88e 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -458,6 +458,8 @@ def __init__(self, f, engine='python', **kwds): # might mutate self.engine self.options, self.engine = self._clean_options(options, engine) + if 'has_index_labels' in kwds: + self.options['has_index_labels'] = kwds['has_index_labels'] self._make_engine(self.engine) @@ -933,6 +935,9 @@ def TextParser(*args, **kwds): rows will be discarded index_col : int or list, default None Column or columns to use as the (possibly hierarchical) index + has_index_labels: boolean, default False + True if the cols defined in index_col have an index name and are + not in the header na_values : iterable, default None Custom NA values keep_default_na : bool, default True @@ -1001,6 +1006,9 @@ def __init__(self, f, **kwds): self.doublequote = kwds['doublequote'] self.skipinitialspace = kwds['skipinitialspace'] self.quoting = kwds['quoting'] + self.has_index_labels = False + if 'has_index_labels' in kwds: + self.has_index_labels = kwds['has_index_labels'] self.verbose = kwds['verbose'] self.converters = kwds['converters'] @@ -1108,7 +1116,7 @@ def read(self, rows=None): #handle new style for names in index count_empty_content_vals = count_empty_vals(content[0]) indexnamerow = None - if count_empty_content_vals == len(columns): + if self.has_index_labels and count_empty_content_vals == len(columns): indexnamerow = content[0] content = content[1:] @@ -1715,7 +1723,7 @@ def __repr__(self): return object.__repr__(self) def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, - index_col=None, parse_cols=None, parse_dates=False, + index_col=None, has_index_labels=False, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None, **kwds): """ @@ -1734,6 +1742,9 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, index_col : int, default None Column to use as the row labels of the DataFrame. Pass None if there is no such column + has_index_labels: boolean, default False + True if the cols defined in index_col have an index name and are + not in the header parse_cols : int or list, default None If None then parse all columns, If int then indicates last column to be parsed @@ -1755,6 +1766,7 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, False: self._parse_xls} return choose[self.use_xlsx](sheetname, header=header, skiprows=skiprows, index_col=index_col, + has_index_labels=has_index_labels, parse_cols=parse_cols, parse_dates=parse_dates, date_parser=date_parser, @@ -1796,7 +1808,7 @@ def _excel2num(x): return i in parse_cols def _parse_xlsx(self, sheetname, header=0, skiprows=None, - skip_footer=0, index_col=None, + skip_footer=0, index_col=None, has_index_labels=False, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None): sheet = self.book.get_sheet_by_name(name=sheetname) @@ -1820,6 +1832,7 @@ def _parse_xlsx(self, sheetname, header=0, skiprows=None, data[header] = _trim_excel_header(data[header]) parser = TextParser(data, header=header, index_col=index_col, + has_index_labels=has_index_labels, na_values=na_values, thousands=thousands, parse_dates=parse_dates, @@ -1831,7 +1844,7 @@ def _parse_xlsx(self, sheetname, header=0, skiprows=None, return parser.read() def _parse_xls(self, sheetname, header=0, skiprows=None, - skip_footer=0, index_col=None, + skip_footer=0, index_col=None, has_index_labels=None, parse_cols=None, parse_dates=False, date_parser=None, na_values=None, thousands=None, chunksize=None): from xlrd import xldate_as_tuple, XL_CELL_DATE, XL_CELL_ERROR @@ -1865,6 +1878,7 @@ def _parse_xls(self, sheetname, header=0, skiprows=None, data[header] = _trim_excel_header(data[header]) parser = TextParser(data, header=header, index_col=index_col, + has_index_labels=has_index_labels, na_values=na_values, thousands=thousands, parse_dates=parse_dates, diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 5b1f900904730..61456d6dbfe2e 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3842,7 +3842,7 @@ def test_to_excel_from_excel(self): # test roundtrip self.frame.to_excel(path,'test1') reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0) + recons = reader.parse('test1', index_col=0, has_index_labels=True) assert_frame_equal(self.frame, recons) self.frame.to_excel(path,'test1', index=False) @@ -3851,19 +3851,19 @@ def test_to_excel_from_excel(self): recons.index = self.frame.index assert_frame_equal(self.frame, recons) - self.frame.to_excel(path,'test1') - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0, skiprows=[2]) - assert_frame_equal(self.frame.ix[1:], recons) + # self.frame.to_excel(path,'test1') + # reader = ExcelFile(path) + # recons = reader.parse('test1', index_col=0, skiprows=[2], has_index_labels=True) + # assert_frame_equal(self.frame.ix[1:], recons) self.frame.to_excel(path,'test1',na_rep='NA') reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0, na_values=['NA']) + recons = reader.parse('test1', index_col=0, na_values=['NA'], has_index_labels=True) assert_frame_equal(self.frame, recons) self.mixed_frame.to_excel(path,'test1') reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0) + recons = reader.parse('test1', index_col=0, has_index_labels=True) assert_frame_equal(self.mixed_frame, recons) self.tsframe.to_excel(path, 'test1') @@ -3891,7 +3891,7 @@ def test_to_excel_from_excel(self): self.tsframe.to_excel(writer,'test2') writer.save() reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) + recons = reader.parse('test1',index_col=0, has_index_labels=True) assert_frame_equal(self.frame, recons) recons = reader.parse('test2',index_col=0) assert_frame_equal(self.tsframe, recons) @@ -3903,7 +3903,7 @@ def test_to_excel_from_excel(self): col_aliases = Index(['AA', 'X', 'Y', 'Z']) self.frame2.to_excel(path, 'test1', header=col_aliases) reader = ExcelFile(path) - rs = reader.parse('test1', index_col=0) + rs = reader.parse('test1', index_col=0, has_index_labels=True) xp = self.frame2.copy() xp.columns = col_aliases assert_frame_equal(xp, rs) @@ -3912,24 +3912,37 @@ def test_to_excel_from_excel(self): frame = (DataFrame(np.random.randn(10,2)) >= 0) frame.to_excel(path, 'test1', index_label=['test']) reader = ExcelFile(path) - recons = reader.parse('test1').astype(np.int64) + recons = reader.parse('test1', index_col=0, has_index_labels=True).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10,2)) >= 0) frame.to_excel(path, 'test1', index_label=['test', 'dummy', 'dummy2']) reader = ExcelFile(path) - recons = reader.parse('test1').astype(np.int64) + recons = reader.parse('test1', index_col=0, has_index_labels=True).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) frame = (DataFrame(np.random.randn(10,2)) >= 0) frame.to_excel(path, 'test1', index_label='test') reader = ExcelFile(path) - recons = reader.parse('test1').astype(np.int64) + recons = reader.parse('test1', index_col=0, has_index_labels=True).astype(np.int64) frame.index.names = ['test'] self.assertEqual(frame.index.names, recons.index.names) + #test index_labels in same row as column names + self.frame.to_excel('/tmp/tests.xls', 'test1', cols=['A', 'B', 'C', 'D'], index=False) + #take 'A' and 'B' as indexes (they are in same row as cols 'C', 'D') + df = self.frame.copy() + df = df.set_index(['A', 'B']) + + + reader = ExcelFile('/tmp/tests.xls') + recons = reader.parse('test1', index_col=[0, 1]) + assert_frame_equal(df, recons) + + + os.remove(path) # datetime.date, not sure what to test here exactly @@ -3993,7 +4006,7 @@ def test_to_excel_multiindex(self): # round trip frame.to_excel(path, 'test1') reader = ExcelFile(path) - df = reader.parse('test1', index_col=[0,1], parse_dates=False) + df = reader.parse('test1', index_col=[0,1], parse_dates=False, has_index_labels=True) assert_frame_equal(frame, df) self.assertEqual(frame.index.names, df.index.names) self.frame.index = old_index # needed if setUP becomes a classmethod @@ -4006,7 +4019,7 @@ def test_to_excel_multiindex(self): tsframe.to_excel(path, 'test1', index_label = ['time','foo']) reader = ExcelFile(path) - recons = reader.parse('test1', index_col=[0,1]) + recons = reader.parse('test1', index_col=[0,1], has_index_labels=True) assert_frame_equal(tsframe, recons) # infer index