Skip to content

ENH: Added xlsxwriter as an ExcelWriter option. #4739

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ci/print_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@
except:
print("openpyxl: Not installed")

try:
import xlsxwriter
print("xlsxwriter: %s" % xlsxwriter.__version__)
except:
print("xlwt: Not installed")

try:
import xlrd
print("xlrd: %s" % xlrd.__VERSION__)
Expand Down
1 change: 1 addition & 0 deletions ci/requirements-2.6.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ python-dateutil==1.5
pytz==2013b
http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz
html5lib==1.0b2
xlsxwriter==0.4.2
1 change: 1 addition & 0 deletions ci/requirements-2.7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ numexpr==2.1
tables==2.3.1
matplotlib==1.1.1
openpyxl==1.6.2
xlsxwriter==0.4.2
xlrd==0.9.2
patsy==0.1.0
html5lib==1.0b2
Expand Down
1 change: 1 addition & 0 deletions ci/requirements-2.7_LOCALE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ python-dateutil
pytz==2013b
xlwt==0.7.5
openpyxl==1.6.2
xlsxwriter==0.4.2
xlrd==0.9.2
numpy==1.6.1
cython==0.19.1
Expand Down
1 change: 1 addition & 0 deletions ci/requirements-3.2.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
python-dateutil==2.1
pytz==2013b
openpyxl==1.6.2
xlsxwriter==0.4.2
xlrd==0.9.2
numpy==1.6.2
cython==0.19.1
Expand Down
1 change: 1 addition & 0 deletions ci/requirements-3.3.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
python-dateutil==2.1
pytz==2013b
openpyxl==1.6.2
xlsxwriter==0.4.2
xlrd==0.9.2
html5lib==1.0b2
numpy==1.7.1
Expand Down
4 changes: 2 additions & 2 deletions doc/source/10min.rst
Original file line number Diff line number Diff line change
Expand Up @@ -695,13 +695,13 @@ Writing to an excel file

.. ipython:: python

df.to_excel('foo.xlsx', sheet_name='sheet1')
df.to_excel('foo.xlsx', sheet_name='Sheet1')

Reading from an excel file

.. ipython:: python

pd.read_excel('foo.xlsx', 'sheet1', index_col=None, na_values=['NA'])
pd.read_excel('foo.xlsx', 'Sheet1', index_col=None, na_values=['NA'])

.. ipython:: python
:suppress:
Expand Down
6 changes: 3 additions & 3 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1667,7 +1667,7 @@ written. For example:

.. code-block:: python

df.to_excel('path_to_file.xlsx', sheet_name='sheet1')
df.to_excel('path_to_file.xlsx', sheet_name='Sheet1')

Files with a ``.xls`` extension will be written using ``xlwt`` and those with
a ``.xlsx`` extension will be written using ``openpyxl``.
Expand All @@ -1680,8 +1680,8 @@ one can use the ExcelWriter class, as in the following example:
.. code-block:: python

writer = ExcelWriter('path_to_file.xlsx')
df1.to_excel(writer, sheet_name='sheet1')
df2.to_excel(writer, sheet_name='sheet2')
df1.to_excel(writer, sheet_name='Sheet1')
df2.to_excel(writer, sheet_name='Sheet2')
writer.save()

.. _io.hdf5:
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1356,17 +1356,18 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
tupleize_cols=tupleize_cols)
formatter.save()

def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
float_format=None, cols=None, header=True, index=True,
index_label=None, startrow=0, startcol=0):
index_label=None, startrow=0, startcol=0,
engine=None):
"""
Write DataFrame to a excel sheet

Parameters
----------
excel_writer : string or ExcelWriter object
File path or existing ExcelWriter
sheet_name : string, default 'sheet1'
sheet_name : string, default 'Sheet1'
Name of sheet which will contain DataFrame
na_rep : string, default ''
Missing data representation
Expand All @@ -1385,6 +1386,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
sequence should be given if the DataFrame uses MultiIndex.
startow : upper left cell row to dump data frame
startcol : upper left cell column to dump data frame
engine : Excel writer class
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add the possibilities, and say that it will try these in order if not specified (and say that it's a string, not a class)



Notes
Expand All @@ -1393,14 +1395,14 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
to the existing workbook. This can be used to save different
DataFrames to one workbook
>>> writer = ExcelWriter('output.xlsx')
>>> df1.to_excel(writer,'sheet1')
>>> df2.to_excel(writer,'sheet2')
>>> df1.to_excel(writer,'Sheet1')
>>> df2.to_excel(writer,'Sheet2')
>>> writer.save()
"""
from pandas.io.excel import ExcelWriter
need_save = False
if isinstance(excel_writer, compat.string_types):
excel_writer = ExcelWriter(excel_writer)
excel_writer = ExcelWriter(excel_writer, engine)
need_save = True

formatter = fmt.ExcelFormatter(self,
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ def to_sparse(self, fill_value=None, kind='block'):
default_kind=kind,
default_fill_value=fill_value)

def to_excel(self, path, na_rep=''):
def to_excel(self, path, na_rep='', engine=None):
"""
Write each DataFrame in Panel to a separate excel sheet

Expand All @@ -468,9 +468,10 @@ def to_excel(self, path, na_rep=''):
File path or existing ExcelWriter
na_rep : string, default ''
Missing data representation
engine : string, Excel writer class
"""
from pandas.io.excel import ExcelWriter
writer = ExcelWriter(path)
writer = ExcelWriter(path, engine=engine)
for item, df in compat.iteritems(self):
name = str(item)
df.to_excel(writer, name, na_rep=na_rep)
Expand Down
177 changes: 152 additions & 25 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,20 @@
from pandas.tseries.period import Period
from pandas import json
from pandas.compat import map, zip, reduce, range, lrange
from pandas.core import config
import pandas.compat as compat
from warnings import warn

# Set up the io.excel specific configuration.
writer_engine_doc = """
: string
The default Excel engine. The options are 'openpyxl' (the default), 'xlwt'
and 'xlsxwriter'.
"""
with config.config_prefix('io.excel'):
config.register_option('writer_engine', None, writer_engine_doc,
validator=str)


def read_excel(path_or_buf, sheetname, **kwds):
"""Read an Excel table into a pandas DataFrame
Expand Down Expand Up @@ -256,7 +267,7 @@ def to_xls(style_dict, num_format_str=None):
import xlwt

def style_to_xlwt(item, firstlevel=True, field_sep=',', line_sep=';'):
"""helper wich recursively generate an xlwt easy style string
"""helper which recursively generate an xlwt easy style string
for example:

hstyle = {"font": {"bold": True},
Expand Down Expand Up @@ -318,6 +329,37 @@ def to_xlsx(style_dict):

return xls_style

@staticmethod
def to_xlsxwriter(workbook, style_dict, num_format_str=None):
"""
Converts a style_dict to an XlxsWriter format object.
Parameters
----------
workbook: Reference to the ExcelWriter XlxsWriter workbook.
style_dict: Style dictionary to convert.
num_format: Optional number format for the cell format.
"""
if style_dict is None:
return None

# Create a XlsxWriter format object.
xl_format = workbook.add_format()

# Map the cell font to XlsxWriter font properties.
if style_dict.get('font'):
font = style_dict['font']
if font.get('bold'):
xl_format.set_bold()

# Map the cell borders to XlsxWriter border properties.
if style_dict.get('borders'):
xl_format.set_border()

if num_format_str is not None:
xl_format.set_num_format(num_format_str)

return xl_format


def _conv_value(val):
# convert value for excel dump
Expand All @@ -341,30 +383,35 @@ class ExcelWriter(object):
path : string
Path to xls file
"""
def __init__(self, path):
self.use_xlsx = True
if path.endswith('.xls'):
self.use_xlsx = False
import xlwt
self.book = xlwt.Workbook()
self.fm_datetime = xlwt.easyxf(
num_format_str='YYYY-MM-DD HH:MM:SS')
self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD')
else:
from openpyxl.workbook import Workbook
self.book = Workbook() # optimized_write=True)
# open pyxl 1.6.1 adds a dummy sheet remove it
if self.book.worksheets:
self.book.remove_sheet(self.book.worksheets[0])
self.path = path
def __init__(self, path, engine=None, **engine_kwargs):

if engine is None:
default = config.get_option('io.excel.writer_engine')
if default is not None:
engine = default
elif path.endswith('.xls'):
engine = 'xlwt'
else:
engine = 'openpyxl'

try:
writer_init = getattr(self, "_init_%s" % engine)
except AttributeError:
raise ValueError("No engine: %s" % engine)

writer_init(path, **engine_kwargs)

self.sheets = {}
self.cur_sheet = None

def save(self):
"""
Save workbook to disk
"""
self.book.save(self.path)
if self.engine == 'xlsxwriter':
self.book.close()
else:
self.book.save(self.path)

def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
"""
Expand All @@ -381,16 +428,20 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
"""
if sheet_name is None:
sheet_name = self.cur_sheet

if sheet_name is None: # pragma: no cover
raise ValueError('Must pass explicit sheet_name or set '
'cur_sheet property')
if self.use_xlsx:
self._writecells_xlsx(cells, sheet_name, startrow, startcol)
else:
self._writecells_xls(cells, sheet_name, startrow, startcol)
'cur_sheet property')

def _writecells_xlsx(self, cells, sheet_name, startrow, startcol):
try:
_writecells = getattr(self, "_writecells_%s" % self.engine)
except AttributeError:
raise ValueError("No _writecells_%s() method" % self.engine)

_writecells(cells, sheet_name, startrow, startcol)

def _writecells_openpyxl(self, cells, sheet_name, startrow, startcol):
# Write the frame cells using openpyxl.
from openpyxl.cell import get_column_letter

if sheet_name in self.sheets:
Expand Down Expand Up @@ -426,7 +477,8 @@ def _writecells_xlsx(self, cells, sheet_name, startrow, startcol):
cletterend,
startrow + cell.mergestart + 1))

def _writecells_xls(self, cells, sheet_name, startrow, startcol):
def _writecells_xlwt(self, cells, sheet_name, startrow, startcol):
# Write the frame cells using xlwt.
if sheet_name in self.sheets:
wks = self.sheets[sheet_name]
else:
Expand Down Expand Up @@ -464,3 +516,78 @@ def _writecells_xls(self, cells, sheet_name, startrow, startcol):
wks.write(startrow + cell.row,
startcol + cell.col,
val, style)

def _writecells_xlsxwriter(self, cells, sheet_name, startrow, startcol):
# Write the frame cells using xlsxwriter.
if sheet_name in self.sheets:
wks = self.sheets[sheet_name]
else:
wks = self.book.add_worksheet(sheet_name)
self.sheets[sheet_name] = wks

style_dict = {}

for cell in cells:
val = _conv_value(cell.val)

num_format_str = None
if isinstance(cell.val, datetime.datetime):
num_format_str = "YYYY-MM-DD HH:MM:SS"
if isinstance(cell.val, datetime.date):
num_format_str = "YYYY-MM-DD"

stylekey = json.dumps(cell.style)
if num_format_str:
stylekey += num_format_str

if stylekey in style_dict:
style = style_dict[stylekey]
else:
style = CellStyleConverter.to_xlsxwriter(self.book,
cell.style,
num_format_str)
style_dict[stylekey] = style

if cell.mergestart is not None and cell.mergeend is not None:
wks.merge_range(startrow + cell.row,
startrow + cell.mergestart,
startcol + cell.col,
startcol + cell.mergeend,
val, style)
else:
wks.write(startrow + cell.row,
startcol + cell.col,
val, style)

def _init_xlwt(self, filename, **engine_kwargs):
# Use the xlwt module as the Excel writer.
import xlwt

self.engine = 'xlwt'
self.path = filename
self.book = xlwt.Workbook()
self.fm_datetime = xlwt.easyxf(num_format_str='YYYY-MM-DD HH:MM:SS')
self.fm_date = xlwt.easyxf(num_format_str='YYYY-MM-DD')

def _init_openpyxl(self, filename, **engine_kwargs):
# Use the openpyxl module as the Excel writer.
from openpyxl.workbook import Workbook

self.engine = 'openpyxl'
self.path = filename
# Create workbook object with default optimized_write=True.
self.book = Workbook()
# Openpyxl 1.6.1 adds a dummy sheet. We remove it.
if self.book.worksheets:
self.book.remove_sheet(self.book.worksheets[0])

def _init_xlsxwriter(self, filename, **engine_kwargs):
# Use the xlsxwriter module as the Excel writer.
import xlsxwriter

options = dict(engine_kwargs)

options.setdefault('default_date_format', 'YYYY-MM-DD HH:MM:SS')

self.engine = 'xlsxwriter'
self.book = xlsxwriter.Workbook(filename, options)
Binary file added pandas/io/tests/data/xw_frame01.xlsx
Binary file not shown.
Binary file added pandas/io/tests/data/xw_frame02.xlsx
Binary file not shown.
Binary file added pandas/io/tests/data/xw_frame03.xlsx
Binary file not shown.
Binary file added pandas/io/tests/data/xw_frame04.xlsx
Binary file not shown.
Binary file added pandas/io/tests/data/xw_frame05.xlsx
Binary file not shown.
Binary file added pandas/io/tests/data/xw_frame06.xlsx
Binary file not shown.
Binary file added pandas/io/tests/data/xw_frame07.xlsx
Binary file not shown.
Loading