From cd393c13c1d944af277fa4de9f745680dd8baf14 Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Wed, 1 Feb 2012 22:18:26 -0500 Subject: [PATCH 01/14] Use izip instead of zip --- pandas/io/parsers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 541db483a4d61..1ff042dc31e07 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3,6 +3,7 @@ """ from StringIO import StringIO import re +from itertools import izip import numpy as np @@ -469,7 +470,7 @@ def get_chunk(self, rows=None): if len(self.columns) != len(zipped_content): raise Exception('wrong number of columns') - data = dict((k, v) for k, v in zip(self.columns, zipped_content)) + data = dict((k, v) for k, v in izip(self.columns, zipped_content)) # apply converters for col, f in self.converters.iteritems(): @@ -601,7 +602,7 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None, data = [] for i in range(sheet.nrows): row = [] - for value, typ in zip(sheet.row_values(i), sheet.row_types(i)): + for value, typ in izip(sheet.row_values(i), sheet.row_types(i)): if typ == XL_CELL_DATE: dt = xldate_as_tuple(value, datemode) # how to produce this first case? From 3da383743042ff8c427c972b3b4a926698e2d5ba Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Thu, 2 Feb 2012 10:49:48 -0500 Subject: [PATCH 02/14] Add to_excel method to DataFrame, add ExcelWriter helper class Refactor to_csv to use a helper method _helper_csvexcel that is also used by to_excel --- pandas/core/frame.py | 119 ++++++++++++++++++++++++++++--------------- pandas/io/parsers.py | 65 +++++++++++++++++++++++ 2 files changed, 143 insertions(+), 41 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f3a2315be8f94..387c7dec792a1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -840,44 +840,8 @@ def to_panel(self): to_wide = deprecate('to_wide', to_panel) - def to_csv(self, path, sep=",", na_rep='', cols=None, header=True, - index=True, index_label=None, mode='w', nanRep=None, - encoding=None): - """ - Write DataFrame to a comma-separated values (csv) file - - Parameters - ---------- - path : string - File path - nanRep : string, default '' - Missing data rep'n - cols : sequence, optional - Columns to write - header : boolean, default True - Write out column names - index : boolean, default True - Write row names (index) - index_label : string or sequence, default None - Column label for index column(s) if desired. If None is given, and - `header` and `index` are True, then the index names are used. A - sequence should be given if the DataFrame uses MultiIndex. - mode : Python write mode, default 'w' - sep : character, default "," - Field delimiter for the output file. - encoding : string, optional - a string representing the encoding to use if the contents are - non-ascii, for python versions prior to 3 - """ - f = open(path, mode) - csvout = csv.writer(f, lineterminator='\n', delimiter=sep) - - if nanRep is not None: # pragma: no cover - import warnings - warnings.warn("nanRep is deprecated, use na_rep", - FutureWarning) - na_rep = nanRep - + def _helper_csvexcel(self, writer, na_rep=None, cols=None, header=True, + index=True, index_label=None, encoding=None): if cols is None: cols = self.columns @@ -911,7 +875,7 @@ def to_csv(self, path, sep=",", na_rep='', cols=None, header=True, encoded_labels = list(index_label) encoded_cols = list(cols) - csvout.writerow(encoded_labels + encoded_cols) + writer.writerow(encoded_labels + encoded_cols) else: if encoding is not None: encoded_cols = [csv_encode(val, encoding=encoding) @@ -919,7 +883,7 @@ def to_csv(self, path, sep=",", na_rep='', cols=None, header=True, else: encoded_cols = list(cols) - csvout.writerow(encoded_cols) + writer.writerow(encoded_cols) nlevels = getattr(self.index, 'nlevels', 1) for idx in self.index: @@ -942,10 +906,83 @@ def to_csv(self, path, sep=",", na_rep='', cols=None, header=True, else: encoded_rows = list(row_fields) - csvout.writerow(encoded_rows) + writer.writerow(encoded_rows) + + def to_csv(self, path, sep=",", na_rep='', cols=None, header=True, + index=True, index_label=None, mode='w', nanRep=None, + encoding=None): + """ + Write DataFrame to a comma-separated values (csv) file + + Parameters + ---------- + path : string + File path + nanRep : string, default '' + Missing data rep'n + cols : sequence, optional + Columns to write + header : boolean, default True + Write out column names + index : boolean, default True + Write row names (index) + index_label : string or sequence, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. + mode : Python write mode, default 'w' + sep : character, default "," + Field delimiter for the output file. + encoding : string, optional + a string representing the encoding to use if the contents are + non-ascii, for python versions prior to 3 + """ + f = open(path, mode) + csvout = csv.writer(f, lineterminator='\n', delimiter=sep) + if nanRep is not None: # pragma: no cover + import warnings + warnings.warn("nanRep is deprecated, use na_rep", + FutureWarning) + na_rep = nanRep + + self._helper_csvexcel(csvout, na_rep=na_rep, cols=cols, header=header, + index=index, index_label=index_label, encoding=encoding) f.close() + def to_excel(self, excel_writer, sheet_name, na_rep='', cols=None, header=True, + index=True, index_label=None): + """ + Write DataFrame to a excel sheet + + Parameters + ---------- + excel_writer : string or ExcelWriter object + File path or existing ExcelWriter + na_rep : string, default '' + Missing data rep'n + cols : sequence, optional + Columns to write + header : boolean, default True + Write out column names + index : boolean, default True + Write row names (index) + index_label : string or sequence, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. + """ + from pandas.io.parsers import ExcelWriter + needSave = False + if isinstance(excel_writer, str): + excel_writer = ExcelWriter(excel_writer) + needSave = True + excel_writer.cur_sheet = sheet_name + self._helper_csvexcel(excel_writer, na_rep=na_rep, cols=cols, header=header, + index=index, index_label=index_label, encoding=None) + if needSave: + excel_writer.save() + @Appender(docstring_to_string, indents=1) def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, header=True, index=True, na_rep='NaN', formatters=None, diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 1ff042dc31e07..93c3e743367f0 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -9,6 +9,7 @@ from pandas.core.index import Index, MultiIndex from pandas.core.frame import DataFrame +import datetime import pandas.core.common as com import pandas._tseries as lib @@ -621,3 +622,67 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None, chunksize=chunksize) return parser.get_chunk() + +class ExcelWriter(object): + """ + Class for writing DataFrame objects into excel sheets, uses xlwt. See + ExcelWriter.write for more documentation + + Parameters + ---------- + path : string + Path to xls file + """ + def __init__(self, path): + import xlwt + self.path = path + self.book = xlwt.Workbook() + self.sheets = {} + self.cur_sheet = None + self.fm_datetime = xlwt.easyxf(num_format_str='YYYY-MM-DD HH:MM:SS') + self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD') + + def __repr__(self): + return object.__repr__(self) + + def save(self): + """ + Save workbook to disk + """ + self.book.save(self.path) + + + def writerow(self, row, sheet_name=None): + """ + Write the given row into Excel an excel sheet + + Parameters + ---------- + row : list + Row of data to save to Excel sheet + sheet_name : string, default None + Name of Excel sheet, if None, then use self.cur_sheet + """ + if sheet_name is None: + sheet_name = self.cur_sheet + if sheet_name is None: + raise Exception('Must pass explicit sheet_name or set cur_sheet property') + if sheet_name in self.sheets: + sheet, row_idx = self.sheets[sheet_name] + else: + sheet = self.book.add_sheet(sheet_name) + row_idx = 0 + sheetrow = sheet.row(row_idx) + for i, val in enumerate(row): + if isinstance(val, (datetime.datetime, datetime.date)): + if isinstance(val, datetime.datetime): + sheetrow.write(i,val,self.fm_datetime) + else: + sheetrow.write(i,val,self.fm_date) + else: + sheetrow.write(i,val) + row_idx += 1 + if row_idx == 1000: + sheet.flush_row_data() + self.sheets[sheet_name] = (sheet, row_idx) + From b8d3948bd1c7816878c085aa389cdf32340ef62b Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Thu, 2 Feb 2012 23:34:29 -0500 Subject: [PATCH 03/14] Minor changes based on comments --- pandas/core/frame.py | 4 ++-- pandas/io/parsers.py | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 387c7dec792a1..7f00b582303fc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -973,14 +973,14 @@ def to_excel(self, excel_writer, sheet_name, na_rep='', cols=None, header=True, sequence should be given if the DataFrame uses MultiIndex. """ from pandas.io.parsers import ExcelWriter - needSave = False + need_save = False if isinstance(excel_writer, str): excel_writer = ExcelWriter(excel_writer) needSave = True excel_writer.cur_sheet = sheet_name self._helper_csvexcel(excel_writer, na_rep=na_rep, cols=cols, header=header, index=index, index_label=index_label, encoding=None) - if needSave: + if need_save: excel_writer.save() @Appender(docstring_to_string, indents=1) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 93c3e743367f0..aaef735e52ee1 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -642,9 +642,6 @@ def __init__(self, path): self.fm_datetime = xlwt.easyxf(num_format_str='YYYY-MM-DD HH:MM:SS') self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD') - def __repr__(self): - return object.__repr__(self) - def save(self): """ Save workbook to disk From 07cb55afa3ca4f50d9743a71b0f9a3f455838252 Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Thu, 2 Feb 2012 23:35:29 -0500 Subject: [PATCH 04/14] Add unittest for to_excel --- pandas/tests/test_frame.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 31732cf7cdaa2..9c6dc94c6e789 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -18,6 +18,7 @@ from pandas.core.index import NULL_INDEX from pandas.core.api import (DataFrame, Index, Series, notnull, isnull, MultiIndex) +from pandas.io.parsers import (ExcelFile, ExcelWriter) from pandas.util.testing import (assert_almost_equal, assert_series_equal, @@ -2490,6 +2491,40 @@ def test_to_csv_unicode(self): assert_frame_equal(df, df2) os.remove(path) + def test_to_excel_from_excel(self): + path = '__tmp__.xls' + + self.frame['A'][:5] = nan + + self.frame.to_excel(path,'test1') + self.frame.to_excel(path,'test1', cols=['A', 'B']) + self.frame.to_excel(path,'test1', header=False) + self.frame.to_excel(path,'test1', index=False) + + # test roundtrip + self.frame.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(self.frame, recons) + + self.tsframe.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(self.tsframe, recons) + + # Test writing to separate sheets + writer = ExcelWriter(path) + self.frame.to_excel(writer,'test1') + self.tsframe.to_excel(writer,'test2') + writer.save() + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(self.frame, recons) + recons = reader.parse('test2',index_col=0) + assert_frame_equal(self.tsframe, recons) + + os.remove(path) + def test_info(self): io = StringIO() self.frame.info(buf=io) From d1ea88b38e71dda4f6ee3b654f3d3179c210fb32 Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Fri, 3 Feb 2012 15:39:47 -0500 Subject: [PATCH 05/14] Missed one variable rename --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7f00b582303fc..fdf81b68af1fd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -976,7 +976,7 @@ def to_excel(self, excel_writer, sheet_name, na_rep='', cols=None, header=True, need_save = False if isinstance(excel_writer, str): excel_writer = ExcelWriter(excel_writer) - needSave = True + need_save = True excel_writer.cur_sheet = sheet_name self._helper_csvexcel(excel_writer, na_rep=na_rep, cols=cols, header=header, index=index, index_label=index_label, encoding=None) From 4e3649c6c251821271470c121c69ba6b0a30a97c Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Fri, 3 Feb 2012 15:52:38 -0500 Subject: [PATCH 06/14] Special case np.int64 in ExcelWriter and add unittest --- pandas/io/parsers.py | 2 ++ pandas/tests/test_frame.py | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index aaef735e52ee1..4204274b70109 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -676,6 +676,8 @@ def writerow(self, row, sheet_name=None): sheetrow.write(i,val,self.fm_datetime) else: sheetrow.write(i,val,self.fm_date) + elif isinstance(val, np.int64): + sheetrow.write(i,int(val)) else: sheetrow.write(i,val) row_idx += 1 diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 9c6dc94c6e789..ee973aeb35441 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2512,6 +2512,13 @@ def test_to_excel_from_excel(self): recons = reader.parse('test1',index_col=0) assert_frame_equal(self.tsframe, recons) + #Test np.int64 + frame = DataFrame(np.random.randn(10,2)) + frame.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(frame, recons) + # Test writing to separate sheets writer = ExcelWriter(path) self.frame.to_excel(writer,'test1') From 9b51dd939bfc381568b8091025e15664e40c564d Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Fri, 3 Feb 2012 16:35:41 -0500 Subject: [PATCH 07/14] Add support for reading/writing .xlsx using openpyxl --- pandas/io/parsers.py | 70 ++++++++++++++++++++++++++++++++++---- pandas/tests/test_frame.py | 41 ++++++++++++++++++++++ 2 files changed, 104 insertions(+), 7 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4204274b70109..7338f88953e6e 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -563,9 +563,15 @@ class ExcelFile(object): Path to xls file """ def __init__(self, path): - import xlrd + self.use_xlsx = True + if path.endswith('.xls'): + self.use_xlsx = False + import xlrd + self.book = xlrd.open_workbook(path) + else: + from openpyxl import load_workbook + self.book = load_workbook(path, use_iterators=True) self.path = path - self.book = xlrd.open_workbook(path) def __repr__(self): return object.__repr__(self) @@ -594,6 +600,34 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None, ------- parsed : DataFrame """ + if self.use_xlsx: + return self._parse_xlsx(sheetname, header=header, skiprows=skiprows, index_col=index_col, + parse_dates=parse_dates, date_parser=date_parser, na_values=na_values, + chunksize=chunksize) + else: + return self._parse_xls(sheetname, header=header, skiprows=skiprows, index_col=index_col, + parse_dates=parse_dates, date_parser=date_parser, na_values=na_values, + chunksize=chunksize) + + def _parse_xlsx(self, sheetname, header=0, skiprows=None, index_col=None, + parse_dates=False, date_parser=None, na_values=None, + chunksize=None): + sheet = self.book.get_sheet_by_name(name=sheetname) + data = [] + for row in sheet.iter_rows(): # it brings a new method: iter_rows() + data.append([cell.internal_value for cell in row]) + parser = TextParser(data, header=header, index_col=index_col, + na_values=na_values, + parse_dates=parse_dates, + date_parser=date_parser, + skiprows=skiprows, + chunksize=chunksize) + + return parser.get_chunk() + + def _parse_xls(self, sheetname, header=0, skiprows=None, index_col=None, + parse_dates=False, date_parser=None, na_values=None, + chunksize=None): from datetime import MINYEAR, time, datetime from xlrd import xldate_as_tuple, XL_CELL_DATE @@ -634,13 +668,19 @@ class ExcelWriter(object): Path to xls file """ def __init__(self, path): - import xlwt + self.use_xlsx = True + if path.endswith('.xls'): + self.use_xlsx = False + import xlwt + self.book = xlwt.Workbook() + self.fm_datetime = xlwt.easyxf(num_format_str='YYYY-MM-DD HH:MM:SS') + self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD') + else: + from openpyxl import Workbook + self.book = Workbook(optimized_write = True) self.path = path - self.book = xlwt.Workbook() self.sheets = {} self.cur_sheet = None - self.fm_datetime = xlwt.easyxf(num_format_str='YYYY-MM-DD HH:MM:SS') - self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD') def save(self): """ @@ -648,7 +688,6 @@ def save(self): """ self.book.save(self.path) - def writerow(self, row, sheet_name=None): """ Write the given row into Excel an excel sheet @@ -664,6 +703,12 @@ def writerow(self, row, sheet_name=None): sheet_name = self.cur_sheet if sheet_name is None: raise Exception('Must pass explicit sheet_name or set cur_sheet property') + if self.use_xlsx: + self._writerow_xlsx(row, sheet_name) + else: + self._writerow_xls(row, sheet_name) + + def _writerow_xls(self, row, sheet_name): if sheet_name in self.sheets: sheet, row_idx = self.sheets[sheet_name] else: @@ -685,3 +730,14 @@ def writerow(self, row, sheet_name=None): sheet.flush_row_data() self.sheets[sheet_name] = (sheet, row_idx) + def _writerow_xlsx(self, row, sheet_name): + if sheet_name in self.sheets: + sheet, row_idx = self.sheets[sheet_name] + else: + sheet = self.book.create_sheet() + sheet.title = sheet_name + row_idx = 0 + + sheet.append([int(val) if isinstance(val, np.int64) else val for val in row]) + row_idx += 1 + self.sheets[sheet_name] = (sheet, row_idx) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ee973aeb35441..9d244432764f3 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2532,6 +2532,47 @@ def test_to_excel_from_excel(self): os.remove(path) + def test_to_excel2007_from_excel2007(self): + path = '__tmp__.xlsx' + + self.frame['A'][:5] = nan + + self.frame.to_excel(path,'test1') + self.frame.to_excel(path,'test1', cols=['A', 'B']) + self.frame.to_excel(path,'test1', header=False) + self.frame.to_excel(path,'test1', index=False) + + # test roundtrip + self.frame.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(self.frame, recons) + + self.tsframe.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(self.tsframe, recons) + + #Test np.int64 + frame = DataFrame(np.random.randn(10,2)) + frame.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(frame, recons) + + # Test writing to separate sheets + writer = ExcelWriter(path) + self.frame.to_excel(writer,'test1') + self.tsframe.to_excel(writer,'test2') + writer.save() + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(self.frame, recons) + recons = reader.parse('test2',index_col=0) + assert_frame_equal(self.tsframe, recons) + + os.remove(path) + def test_info(self): io = StringIO() self.frame.info(buf=io) From f627638e2a1e91c52fc8e510df7b05f5f5ed7476 Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Fri, 3 Feb 2012 16:44:27 -0500 Subject: [PATCH 08/14] Document writing multiple DataFrames to different sheets --- pandas/core/frame.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fdf81b68af1fd..26633d58c7911 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -971,6 +971,16 @@ def to_excel(self, excel_writer, sheet_name, na_rep='', cols=None, header=True, Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. + + Notes + ----- + If passing an existing ExcelWriter object, then the sheet will be added + to the existing workbook. This can be used to save different DataFrames + to one workbook + >>> writer = ExcelWriter('output.xlsx') + >>> df1.to_excel(writer,'sheet1') + >>> df2.to_excel(writer,'sheet2') + >>> writer.save() """ from pandas.io.parsers import ExcelWriter need_save = False From b95af85d9bba1efc2ab91c500bc5b0f5a5b857a5 Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Fri, 3 Feb 2012 17:03:24 -0500 Subject: [PATCH 09/14] Test to_excel with MultiIndex --- pandas/tests/test_frame.py | 90 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 9d244432764f3..b6dd035a32497 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2532,6 +2532,51 @@ def test_to_excel_from_excel(self): os.remove(path) + def test_to_excel_multiindex(self): + path = '__tmp__.xls' + + frame = self.frame + old_index = frame.index + arrays = np.arange(len(old_index)*2).reshape(2,-1) + new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) + frame.index = new_index + frame.to_excel(path, 'test1', header=False) + frame.to_excel(path, 'test1', cols=['A', 'B']) + + # round trip + frame.to_excel(path, 'test1') + reader = ExcelFile(path) + df = reader.parse('test1', index_col=[0,1], parse_dates=False) + assert_frame_equal(frame, df) + self.assertEqual(frame.index.names, df.index.names) + self.frame.index = old_index # needed if setUP becomes a classmethod + + # try multiindex with dates + tsframe = self.tsframe + old_index = tsframe.index + new_index = [old_index, np.arange(len(old_index))] + tsframe.index = MultiIndex.from_arrays(new_index) + + tsframe.to_excel(path, 'test1', index_label = ['time','foo']) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=[0,1]) + assert_frame_equal(tsframe, recons) + + # do not load index + tsframe.to_excel(path, 'test1') + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=None) + np.testing.assert_equal(len(recons.columns), len(tsframe.columns) + 2) + + # no index + tsframe.to_excel(path, 'test1', index=False) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=None) + assert_almost_equal(recons.values, self.tsframe.values) + self.tsframe.index = old_index # needed if setUP becomes classmethod + + os.remove(path) + def test_to_excel2007_from_excel2007(self): path = '__tmp__.xlsx' @@ -2573,6 +2618,51 @@ def test_to_excel2007_from_excel2007(self): os.remove(path) + def test_to_excel2007_multiindex(self): + path = '__tmp__.xlsx' + + frame = self.frame + old_index = frame.index + arrays = np.arange(len(old_index)*2).reshape(2,-1) + new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) + frame.index = new_index + frame.to_excel(path, 'test1', header=False) + frame.to_excel(path, 'test1', cols=['A', 'B']) + + # round trip + frame.to_excel(path, 'test1') + reader = ExcelFile(path) + df = reader.parse('test1', index_col=[0,1], parse_dates=False) + assert_frame_equal(frame, df) + self.assertEqual(frame.index.names, df.index.names) + self.frame.index = old_index # needed if setUP becomes a classmethod + + # try multiindex with dates + tsframe = self.tsframe + old_index = tsframe.index + new_index = [old_index, np.arange(len(old_index))] + tsframe.index = MultiIndex.from_arrays(new_index) + + tsframe.to_excel(path, 'test1', index_label = ['time','foo']) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=[0,1]) + assert_frame_equal(tsframe, recons) + + # do not load index + tsframe.to_excel(path, 'test1') + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=None) + np.testing.assert_equal(len(recons.columns), len(tsframe.columns) + 2) + + # no index + tsframe.to_excel(path, 'test1', index=False) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=None) + assert_almost_equal(recons.values, self.tsframe.values) + self.tsframe.index = old_index # needed if setUP becomes classmethod + + os.remove(path) + def test_info(self): io = StringIO() self.frame.info(buf=io) From f50e4b9576f9292e2be930217346df851fa3961f Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Fri, 3 Feb 2012 17:18:36 -0500 Subject: [PATCH 10/14] Add to_excel in Panel and corresponding test --- pandas/core/panel.py | 18 ++++++++++++++++++ pandas/tests/test_panel.py | 9 +++++++++ 2 files changed, 27 insertions(+) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index a8ac4832eb8fb..98da0073599d6 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -441,6 +441,24 @@ def to_sparse(self, fill_value=None, kind='block'): default_kind=kind, default_fill_value=fill_value) + def to_excel(self, path, na_rep=''): + """ + Write each DataFrame in Panel to a separate excel sheet + + Parameters + ---------- + excel_writer : string or ExcelWriter object + File path or existing ExcelWriter + na_rep : string, default '' + Missing data rep'n + """ + from pandas.io.parsers import ExcelWriter + writer = ExcelWriter(path) + for item, df in self.iteritems(): + name = str(item) + df.to_excel(writer, name, na_rep=na_rep) + writer.save() + # TODO: needed? def keys(self): return list(self.items) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 3e07174bdec1b..5d90dbdb79d62 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -16,6 +16,7 @@ import pandas.core.common as com import pandas.core.panel as panelmod from pandas.util import py3compat +from pandas.io.parsers import (ExcelFile, ExcelWriter) from pandas.util.testing import (assert_panel_equal, assert_frame_equal, @@ -973,6 +974,14 @@ def test_from_frame_level1_unsorted(self): p = df.to_panel() assert_frame_equal(p.minor_xs(2), df.ix[:,2].sort_index()) + def test_to_excel(self): + path = '__tmp__.xlsx' + self.panel.to_excel(path) + reader = ExcelFile(path) + for item, df in self.panel.iteritems(): + recdf = reader.parse(str(item),index_col=0) + assert_frame_equal(df, recdf) + class TestLongPanel(unittest.TestCase): """ LongPanel no longer exists, but... From 1f46216bdfadb6887b7f6f6c3db274d8e03c13c3 Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Fri, 3 Feb 2012 19:16:23 -0500 Subject: [PATCH 11/14] Document sheet_name arg in docstring, give it a default value --- pandas/core/frame.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 26633d58c7911..a3203e210795b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -950,7 +950,7 @@ def to_csv(self, path, sep=",", na_rep='', cols=None, header=True, index=index, index_label=index_label, encoding=encoding) f.close() - def to_excel(self, excel_writer, sheet_name, na_rep='', cols=None, header=True, + def to_excel(self, excel_writer, sheet_name = 'sheet1', na_rep='', cols=None, header=True, index=True, index_label=None): """ Write DataFrame to a excel sheet @@ -959,6 +959,8 @@ def to_excel(self, excel_writer, sheet_name, na_rep='', cols=None, header=True, ---------- excel_writer : string or ExcelWriter object File path or existing ExcelWriter + sheet_name : string, default 'sheet1' + Name of sheet which will contain DataFrame na_rep : string, default '' Missing data rep'n cols : sequence, optional From 445bd8e16ad798fb7ad2eff0ceb423a4d77e0f68 Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Tue, 7 Feb 2012 20:57:23 -0500 Subject: [PATCH 12/14] Add some additional excel reading/writing tests --- pandas/tests/test_frame.py | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index b6dd035a32497..5a4f121c97533 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2506,6 +2506,27 @@ def test_to_excel_from_excel(self): reader = ExcelFile(path) recons = reader.parse('test1',index_col=0) assert_frame_equal(self.frame, recons) + + self.frame.to_excel(path,'test1', index=False) + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=None) + recons.index = self.frame.index + assert_frame_equal(self.frame, recons) + + self.frame.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0,skiprows=[1]) + assert_frame_equal(self.frame.ix[1:], recons) + + self.frame.to_excel(path,'test1',na_rep='NA') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0,na_values=['NA']) + assert_frame_equal(self.frame, recons) + + self.mixed_frame.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(self.mixed_frame, recons) self.tsframe.to_excel(path,'test1') reader = ExcelFile(path) @@ -2593,6 +2614,27 @@ def test_to_excel2007_from_excel2007(self): recons = reader.parse('test1',index_col=0) assert_frame_equal(self.frame, recons) + self.frame.to_excel(path,'test1', index=False) + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=None) + recons.index = self.frame.index + assert_frame_equal(self.frame, recons) + + self.frame.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0,skiprows=[1]) + assert_frame_equal(self.frame.ix[1:], recons) + + self.frame.to_excel(path,'test1',na_rep='NA') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0,na_values=['NA']) + assert_frame_equal(self.frame, recons) + + self.mixed_frame.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(self.mixed_frame, recons) + self.tsframe.to_excel(path,'test1') reader = ExcelFile(path) recons = reader.parse('test1',index_col=0) From 494f9bd043ce95d8de166cf073d900733884e00b Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Tue, 7 Feb 2012 21:01:42 -0500 Subject: [PATCH 13/14] Combine xlsx tests with xls to avoid code duplication --- pandas/tests/test_frame.py | 313 ++++++++++++------------------------- 1 file changed, 104 insertions(+), 209 deletions(-) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 5a4f121c97533..fecb25fd5bc00 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2492,218 +2492,113 @@ def test_to_csv_unicode(self): os.remove(path) def test_to_excel_from_excel(self): - path = '__tmp__.xls' - - self.frame['A'][:5] = nan - - self.frame.to_excel(path,'test1') - self.frame.to_excel(path,'test1', cols=['A', 'B']) - self.frame.to_excel(path,'test1', header=False) - self.frame.to_excel(path,'test1', index=False) - - # test roundtrip - self.frame.to_excel(path,'test1') - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) - assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path,'test1', index=False) - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=None) - recons.index = self.frame.index - assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path,'test1') - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0,skiprows=[1]) - assert_frame_equal(self.frame.ix[1:], recons) - - self.frame.to_excel(path,'test1',na_rep='NA') - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0,na_values=['NA']) - assert_frame_equal(self.frame, recons) - - self.mixed_frame.to_excel(path,'test1') - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) - assert_frame_equal(self.mixed_frame, recons) - - self.tsframe.to_excel(path,'test1') - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) - assert_frame_equal(self.tsframe, recons) - - #Test np.int64 - frame = DataFrame(np.random.randn(10,2)) - frame.to_excel(path,'test1') - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) - assert_frame_equal(frame, recons) - - # Test writing to separate sheets - writer = ExcelWriter(path) - self.frame.to_excel(writer,'test1') - self.tsframe.to_excel(writer,'test2') - writer.save() - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) - assert_frame_equal(self.frame, recons) - recons = reader.parse('test2',index_col=0) - assert_frame_equal(self.tsframe, recons) - - os.remove(path) + for ext in ['xls', 'xlsx']: + path = '__tmp__.' + ext + + self.frame['A'][:5] = nan + + self.frame.to_excel(path,'test1') + self.frame.to_excel(path,'test1', cols=['A', 'B']) + self.frame.to_excel(path,'test1', header=False) + self.frame.to_excel(path,'test1', index=False) + + # test roundtrip + self.frame.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(self.frame, recons) + + self.frame.to_excel(path,'test1', index=False) + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=None) + recons.index = self.frame.index + assert_frame_equal(self.frame, recons) + + self.frame.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0,skiprows=[1]) + assert_frame_equal(self.frame.ix[1:], recons) + + self.frame.to_excel(path,'test1',na_rep='NA') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0,na_values=['NA']) + assert_frame_equal(self.frame, recons) + + self.mixed_frame.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(self.mixed_frame, recons) + + self.tsframe.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(self.tsframe, recons) + + #Test np.int64 + frame = DataFrame(np.random.randn(10,2)) + frame.to_excel(path,'test1') + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(frame, recons) + + # Test writing to separate sheets + writer = ExcelWriter(path) + self.frame.to_excel(writer,'test1') + self.tsframe.to_excel(writer,'test2') + writer.save() + reader = ExcelFile(path) + recons = reader.parse('test1',index_col=0) + assert_frame_equal(self.frame, recons) + recons = reader.parse('test2',index_col=0) + assert_frame_equal(self.tsframe, recons) + + os.remove(path) def test_to_excel_multiindex(self): - path = '__tmp__.xls' - - frame = self.frame - old_index = frame.index - arrays = np.arange(len(old_index)*2).reshape(2,-1) - new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) - frame.index = new_index - frame.to_excel(path, 'test1', header=False) - frame.to_excel(path, 'test1', cols=['A', 'B']) - - # round trip - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - df = reader.parse('test1', index_col=[0,1], parse_dates=False) - assert_frame_equal(frame, df) - self.assertEqual(frame.index.names, df.index.names) - self.frame.index = old_index # needed if setUP becomes a classmethod - - # try multiindex with dates - tsframe = self.tsframe - old_index = tsframe.index - new_index = [old_index, np.arange(len(old_index))] - tsframe.index = MultiIndex.from_arrays(new_index) - - tsframe.to_excel(path, 'test1', index_label = ['time','foo']) - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=[0,1]) - assert_frame_equal(tsframe, recons) - - # do not load index - tsframe.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=None) - np.testing.assert_equal(len(recons.columns), len(tsframe.columns) + 2) - - # no index - tsframe.to_excel(path, 'test1', index=False) - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=None) - assert_almost_equal(recons.values, self.tsframe.values) - self.tsframe.index = old_index # needed if setUP becomes classmethod - - os.remove(path) - - def test_to_excel2007_from_excel2007(self): - path = '__tmp__.xlsx' + for ext in ['xls', 'xlsx']: + path = '__tmp__.' + ext - self.frame['A'][:5] = nan - - self.frame.to_excel(path,'test1') - self.frame.to_excel(path,'test1', cols=['A', 'B']) - self.frame.to_excel(path,'test1', header=False) - self.frame.to_excel(path,'test1', index=False) - - # test roundtrip - self.frame.to_excel(path,'test1') - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) - assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path,'test1', index=False) - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=None) - recons.index = self.frame.index - assert_frame_equal(self.frame, recons) - - self.frame.to_excel(path,'test1') - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0,skiprows=[1]) - assert_frame_equal(self.frame.ix[1:], recons) - - self.frame.to_excel(path,'test1',na_rep='NA') - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0,na_values=['NA']) - assert_frame_equal(self.frame, recons) - - self.mixed_frame.to_excel(path,'test1') - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) - assert_frame_equal(self.mixed_frame, recons) - - self.tsframe.to_excel(path,'test1') - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) - assert_frame_equal(self.tsframe, recons) - - #Test np.int64 - frame = DataFrame(np.random.randn(10,2)) - frame.to_excel(path,'test1') - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) - assert_frame_equal(frame, recons) - - # Test writing to separate sheets - writer = ExcelWriter(path) - self.frame.to_excel(writer,'test1') - self.tsframe.to_excel(writer,'test2') - writer.save() - reader = ExcelFile(path) - recons = reader.parse('test1',index_col=0) - assert_frame_equal(self.frame, recons) - recons = reader.parse('test2',index_col=0) - assert_frame_equal(self.tsframe, recons) - - os.remove(path) - - def test_to_excel2007_multiindex(self): - path = '__tmp__.xlsx' - - frame = self.frame - old_index = frame.index - arrays = np.arange(len(old_index)*2).reshape(2,-1) - new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) - frame.index = new_index - frame.to_excel(path, 'test1', header=False) - frame.to_excel(path, 'test1', cols=['A', 'B']) - - # round trip - frame.to_excel(path, 'test1') - reader = ExcelFile(path) - df = reader.parse('test1', index_col=[0,1], parse_dates=False) - assert_frame_equal(frame, df) - self.assertEqual(frame.index.names, df.index.names) - self.frame.index = old_index # needed if setUP becomes a classmethod - - # try multiindex with dates - tsframe = self.tsframe - old_index = tsframe.index - new_index = [old_index, np.arange(len(old_index))] - tsframe.index = MultiIndex.from_arrays(new_index) - - tsframe.to_excel(path, 'test1', index_label = ['time','foo']) - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=[0,1]) - assert_frame_equal(tsframe, recons) - - # do not load index - tsframe.to_excel(path, 'test1') - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=None) - np.testing.assert_equal(len(recons.columns), len(tsframe.columns) + 2) - - # no index - tsframe.to_excel(path, 'test1', index=False) - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=None) - assert_almost_equal(recons.values, self.tsframe.values) - self.tsframe.index = old_index # needed if setUP becomes classmethod - - os.remove(path) + frame = self.frame + old_index = frame.index + arrays = np.arange(len(old_index)*2).reshape(2,-1) + new_index = MultiIndex.from_arrays(arrays, names=['first', 'second']) + frame.index = new_index + frame.to_excel(path, 'test1', header=False) + frame.to_excel(path, 'test1', cols=['A', 'B']) + + # round trip + frame.to_excel(path, 'test1') + reader = ExcelFile(path) + df = reader.parse('test1', index_col=[0,1], parse_dates=False) + assert_frame_equal(frame, df) + self.assertEqual(frame.index.names, df.index.names) + self.frame.index = old_index # needed if setUP becomes a classmethod + + # try multiindex with dates + tsframe = self.tsframe + old_index = tsframe.index + new_index = [old_index, np.arange(len(old_index))] + tsframe.index = MultiIndex.from_arrays(new_index) + + tsframe.to_excel(path, 'test1', index_label = ['time','foo']) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=[0,1]) + assert_frame_equal(tsframe, recons) + + # do not load index + tsframe.to_excel(path, 'test1') + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=None) + np.testing.assert_equal(len(recons.columns), len(tsframe.columns) + 2) + + # no index + tsframe.to_excel(path, 'test1', index=False) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=None) + assert_almost_equal(recons.values, self.tsframe.values) + self.tsframe.index = old_index # needed if setUP becomes classmethod + + os.remove(path) def test_info(self): io = StringIO() From d1206104fdd4148db7c575d09cfed909a17fc347 Mon Sep 17 00:00:00 2001 From: Dieter Vandenbussche Date: Tue, 7 Feb 2012 21:11:05 -0500 Subject: [PATCH 14/14] Fix up docstrings --- pandas/io/parsers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 7338f88953e6e..49ed364127f3f 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -590,7 +590,7 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None, Row to use for the column labels of the parsed DataFrame skiprows : list-like Row numbers to skip (0-indexed) - index_col : int, default 0 + index_col : int, default None Column to use as the row labels of the DataFrame. Pass None if there is no such column na_values : list-like, default None @@ -659,8 +659,8 @@ def _parse_xls(self, sheetname, header=0, skiprows=None, index_col=None, class ExcelWriter(object): """ - Class for writing DataFrame objects into excel sheets, uses xlwt. See - ExcelWriter.write for more documentation + Class for writing DataFrame objects into excel sheets, uses xlwt for xls, + openpyxl for xlsx. See DataFrame.to_excel for typical usage. Parameters ----------