Skip to content

Excel writing #735

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
131 changes: 90 additions & 41 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,44 +840,8 @@ def to_panel(self):

to_wide = deprecate('to_wide', to_panel)

def to_csv(self, path, sep=",", na_rep='', cols=None, header=True,
index=True, index_label=None, mode='w', nanRep=None,
encoding=None):
"""
Write DataFrame to a comma-separated values (csv) file

Parameters
----------
path : string
File path
nanRep : string, default ''
Missing data rep'n
cols : sequence, optional
Columns to write
header : boolean, default True
Write out column names
index : boolean, default True
Write row names (index)
index_label : string or sequence, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex.
mode : Python write mode, default 'w'
sep : character, default ","
Field delimiter for the output file.
encoding : string, optional
a string representing the encoding to use if the contents are
non-ascii, for python versions prior to 3
"""
f = open(path, mode)
csvout = csv.writer(f, lineterminator='\n', delimiter=sep)

if nanRep is not None: # pragma: no cover
import warnings
warnings.warn("nanRep is deprecated, use na_rep",
FutureWarning)
na_rep = nanRep

def _helper_csvexcel(self, writer, na_rep=None, cols=None, header=True,
index=True, index_label=None, encoding=None):
if cols is None:
cols = self.columns

Expand Down Expand Up @@ -911,15 +875,15 @@ def to_csv(self, path, sep=",", na_rep='', cols=None, header=True,
encoded_labels = list(index_label)
encoded_cols = list(cols)

csvout.writerow(encoded_labels + encoded_cols)
writer.writerow(encoded_labels + encoded_cols)
else:
if encoding is not None:
encoded_cols = [csv_encode(val, encoding=encoding)
for val in cols]
else:
encoded_cols = list(cols)

csvout.writerow(encoded_cols)
writer.writerow(encoded_cols)

nlevels = getattr(self.index, 'nlevels', 1)
for idx in self.index:
Expand All @@ -942,10 +906,95 @@ def to_csv(self, path, sep=",", na_rep='', cols=None, header=True,
else:
encoded_rows = list(row_fields)

csvout.writerow(encoded_rows)
writer.writerow(encoded_rows)

def to_csv(self, path, sep=",", na_rep='', cols=None, header=True,
index=True, index_label=None, mode='w', nanRep=None,
encoding=None):
"""
Write DataFrame to a comma-separated values (csv) file

Parameters
----------
path : string
File path
nanRep : string, default ''
Missing data rep'n
cols : sequence, optional
Columns to write
header : boolean, default True
Write out column names
index : boolean, default True
Write row names (index)
index_label : string or sequence, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex.
mode : Python write mode, default 'w'
sep : character, default ","
Field delimiter for the output file.
encoding : string, optional
a string representing the encoding to use if the contents are
non-ascii, for python versions prior to 3
"""
f = open(path, mode)
csvout = csv.writer(f, lineterminator='\n', delimiter=sep)

if nanRep is not None: # pragma: no cover
import warnings
warnings.warn("nanRep is deprecated, use na_rep",
FutureWarning)
na_rep = nanRep

self._helper_csvexcel(csvout, na_rep=na_rep, cols=cols, header=header,
index=index, index_label=index_label, encoding=encoding)
f.close()

def to_excel(self, excel_writer, sheet_name = 'sheet1', na_rep='', cols=None, header=True,
index=True, index_label=None):
"""
Write DataFrame to a excel sheet

Parameters
----------
excel_writer : string or ExcelWriter object
File path or existing ExcelWriter
sheet_name : string, default 'sheet1'
Name of sheet which will contain DataFrame
na_rep : string, default ''
Missing data rep'n
cols : sequence, optional
Columns to write
header : boolean, default True
Write out column names
index : boolean, default True
Write row names (index)
index_label : string or sequence, default None
Column label for index column(s) if desired. If None is given, and
`header` and `index` are True, then the index names are used. A
sequence should be given if the DataFrame uses MultiIndex.

Notes
-----
If passing an existing ExcelWriter object, then the sheet will be added
to the existing workbook. This can be used to save different DataFrames
to one workbook
>>> writer = ExcelWriter('output.xlsx')
>>> df1.to_excel(writer,'sheet1')
>>> df2.to_excel(writer,'sheet2')
>>> writer.save()
"""
from pandas.io.parsers import ExcelWriter
need_save = False
if isinstance(excel_writer, str):
excel_writer = ExcelWriter(excel_writer)
need_save = True
excel_writer.cur_sheet = sheet_name
self._helper_csvexcel(excel_writer, na_rep=na_rep, cols=cols, header=header,
index=index, index_label=index_label, encoding=None)
if need_save:
excel_writer.save()

@Appender(docstring_to_string, indents=1)
def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
header=True, index=True, na_rep='NaN', formatters=None,
Expand Down
18 changes: 18 additions & 0 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,24 @@ def to_sparse(self, fill_value=None, kind='block'):
default_kind=kind,
default_fill_value=fill_value)

def to_excel(self, path, na_rep=''):
"""
Write each DataFrame in Panel to a separate excel sheet

Parameters
----------
excel_writer : string or ExcelWriter object
File path or existing ExcelWriter
na_rep : string, default ''
Missing data rep'n
"""
from pandas.io.parsers import ExcelWriter
writer = ExcelWriter(path)
for item, df in self.iteritems():
name = str(item)
df.to_excel(writer, name, na_rep=na_rep)
writer.save()

# TODO: needed?
def keys(self):
return list(self.items)
Expand Down
131 changes: 126 additions & 5 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
"""
from StringIO import StringIO
import re
from itertools import izip

import numpy as np

from pandas.core.index import Index, MultiIndex
from pandas.core.frame import DataFrame
import datetime
import pandas.core.common as com
import pandas._tseries as lib

Expand Down Expand Up @@ -469,7 +471,7 @@ def get_chunk(self, rows=None):
if len(self.columns) != len(zipped_content):
raise Exception('wrong number of columns')

data = dict((k, v) for k, v in zip(self.columns, zipped_content))
data = dict((k, v) for k, v in izip(self.columns, zipped_content))

# apply converters
for col, f in self.converters.iteritems():
Expand Down Expand Up @@ -561,9 +563,15 @@ class ExcelFile(object):
Path to xls file
"""
def __init__(self, path):
import xlrd
self.use_xlsx = True
if path.endswith('.xls'):
self.use_xlsx = False
import xlrd
self.book = xlrd.open_workbook(path)
else:
from openpyxl import load_workbook
self.book = load_workbook(path, use_iterators=True)
self.path = path
self.book = xlrd.open_workbook(path)

def __repr__(self):
return object.__repr__(self)
Expand All @@ -582,7 +590,7 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
Row to use for the column labels of the parsed DataFrame
skiprows : list-like
Row numbers to skip (0-indexed)
index_col : int, default 0
index_col : int, default None
Column to use as the row labels of the DataFrame. Pass None if there
is no such column
na_values : list-like, default None
Expand All @@ -592,6 +600,34 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
-------
parsed : DataFrame
"""
if self.use_xlsx:
return self._parse_xlsx(sheetname, header=header, skiprows=skiprows, index_col=index_col,
parse_dates=parse_dates, date_parser=date_parser, na_values=na_values,
chunksize=chunksize)
else:
return self._parse_xls(sheetname, header=header, skiprows=skiprows, index_col=index_col,
parse_dates=parse_dates, date_parser=date_parser, na_values=na_values,
chunksize=chunksize)

def _parse_xlsx(self, sheetname, header=0, skiprows=None, index_col=None,
parse_dates=False, date_parser=None, na_values=None,
chunksize=None):
sheet = self.book.get_sheet_by_name(name=sheetname)
data = []
for row in sheet.iter_rows(): # it brings a new method: iter_rows()
data.append([cell.internal_value for cell in row])
parser = TextParser(data, header=header, index_col=index_col,
na_values=na_values,
parse_dates=parse_dates,
date_parser=date_parser,
skiprows=skiprows,
chunksize=chunksize)

return parser.get_chunk()

def _parse_xls(self, sheetname, header=0, skiprows=None, index_col=None,
parse_dates=False, date_parser=None, na_values=None,
chunksize=None):
from datetime import MINYEAR, time, datetime
from xlrd import xldate_as_tuple, XL_CELL_DATE

Expand All @@ -601,7 +637,7 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
data = []
for i in range(sheet.nrows):
row = []
for value, typ in zip(sheet.row_values(i), sheet.row_types(i)):
for value, typ in izip(sheet.row_values(i), sheet.row_types(i)):
if typ == XL_CELL_DATE:
dt = xldate_as_tuple(value, datemode)
# how to produce this first case?
Expand All @@ -620,3 +656,88 @@ def parse(self, sheetname, header=0, skiprows=None, index_col=None,
chunksize=chunksize)

return parser.get_chunk()

class ExcelWriter(object):
"""
Class for writing DataFrame objects into excel sheets, uses xlwt for xls,
openpyxl for xlsx. See DataFrame.to_excel for typical usage.

Parameters
----------
path : string
Path to xls file
"""
def __init__(self, path):
self.use_xlsx = True
if path.endswith('.xls'):
self.use_xlsx = False
import xlwt
self.book = xlwt.Workbook()
self.fm_datetime = xlwt.easyxf(num_format_str='YYYY-MM-DD HH:MM:SS')
self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD')
else:
from openpyxl import Workbook
self.book = Workbook(optimized_write = True)
self.path = path
self.sheets = {}
self.cur_sheet = None

def save(self):
"""
Save workbook to disk
"""
self.book.save(self.path)

def writerow(self, row, sheet_name=None):
"""
Write the given row into Excel an excel sheet

Parameters
----------
row : list
Row of data to save to Excel sheet
sheet_name : string, default None
Name of Excel sheet, if None, then use self.cur_sheet
"""
if sheet_name is None:
sheet_name = self.cur_sheet
if sheet_name is None:
raise Exception('Must pass explicit sheet_name or set cur_sheet property')
if self.use_xlsx:
self._writerow_xlsx(row, sheet_name)
else:
self._writerow_xls(row, sheet_name)

def _writerow_xls(self, row, sheet_name):
if sheet_name in self.sheets:
sheet, row_idx = self.sheets[sheet_name]
else:
sheet = self.book.add_sheet(sheet_name)
row_idx = 0
sheetrow = sheet.row(row_idx)
for i, val in enumerate(row):
if isinstance(val, (datetime.datetime, datetime.date)):
if isinstance(val, datetime.datetime):
sheetrow.write(i,val,self.fm_datetime)
else:
sheetrow.write(i,val,self.fm_date)
elif isinstance(val, np.int64):
sheetrow.write(i,int(val))
else:
sheetrow.write(i,val)
row_idx += 1
if row_idx == 1000:
sheet.flush_row_data()
self.sheets[sheet_name] = (sheet, row_idx)

def _writerow_xlsx(self, row, sheet_name):
if sheet_name in self.sheets:
sheet, row_idx = self.sheets[sheet_name]
else:
sheet = self.book.create_sheet()
sheet.title = sheet_name
row_idx = 0

sheet.append([int(val) if isinstance(val, np.int64) else val for val in row])
row_idx += 1
self.sheets[sheet_name] = (sheet, row_idx)
Loading