diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt
index 6a94d48ad7a5f..2e903102de7b1 100644
--- a/ci/requirements-2.7.txt
+++ b/ci/requirements-2.7.txt
@@ -8,6 +8,7 @@ numexpr==2.1
tables==2.3.1
matplotlib==1.1.1
openpyxl==1.6.2
+xlsxwriter==0.4.3
xlrd==0.9.2
patsy==0.1.0
html5lib==1.0b2
diff --git a/ci/requirements-2.7_LOCALE.txt b/ci/requirements-2.7_LOCALE.txt
index a7e9d62e3549b..056b63bbb8591 100644
--- a/ci/requirements-2.7_LOCALE.txt
+++ b/ci/requirements-2.7_LOCALE.txt
@@ -2,6 +2,7 @@ python-dateutil
pytz==2013b
xlwt==0.7.5
openpyxl==1.6.2
+xlsxwriter==0.4.3
xlrd==0.9.2
numpy==1.6.1
cython==0.19.1
diff --git a/ci/requirements-3.2.txt b/ci/requirements-3.2.txt
index e907a2fa828f1..b689047019ed7 100644
--- a/ci/requirements-3.2.txt
+++ b/ci/requirements-3.2.txt
@@ -1,6 +1,7 @@
python-dateutil==2.1
pytz==2013b
openpyxl==1.6.2
+xlsxwriter==0.4.3
xlrd==0.9.2
numpy==1.6.2
cython==0.19.1
diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt
index eb1e725d98040..326098be5f7f4 100644
--- a/ci/requirements-3.3.txt
+++ b/ci/requirements-3.3.txt
@@ -1,6 +1,7 @@
python-dateutil==2.1
pytz==2013b
openpyxl==1.6.2
+xlsxwriter==0.4.3
xlrd==0.9.2
html5lib==1.0b2
numpy==1.7.1
diff --git a/doc/source/10min.rst b/doc/source/10min.rst
index 58c5b54968614..705514ac0c364 100644
--- a/doc/source/10min.rst
+++ b/doc/source/10min.rst
@@ -695,13 +695,13 @@ Writing to an excel file
.. ipython:: python
- df.to_excel('foo.xlsx', sheet_name='sheet1')
+ df.to_excel('foo.xlsx', sheet_name='Sheet1')
Reading from an excel file
.. ipython:: python
- pd.read_excel('foo.xlsx', 'sheet1', index_col=None, na_values=['NA'])
+ pd.read_excel('foo.xlsx', 'Sheet1', index_col=None, na_values=['NA'])
.. ipython:: python
:suppress:
diff --git a/doc/source/install.rst b/doc/source/install.rst
index 4472d844c1871..b1dcad9448cfd 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -100,6 +100,8 @@ Optional Dependencies
* `openpyxl `__, `xlrd/xlwt `__
* openpyxl version 1.6.1 or higher
* Needed for Excel I/O
+ * `XlsxWriter `__
+ * Alternative Excel writer.
* `boto `__: necessary for Amazon S3
access.
* One of `PyQt4
diff --git a/doc/source/io.rst b/doc/source/io.rst
index b9581c37082bf..67492eddbac12 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -1654,7 +1654,7 @@ indices to be parsed.
.. code-block:: python
- read_excel('path_to_file.xls', Sheet1', parse_cols=[0, 2, 3], index_col=None, na_values=['NA'])
+ read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3], index_col=None, na_values=['NA'])
To write a DataFrame object to a sheet of an Excel file, you can use the
``to_excel`` instance method. The arguments are largely the same as ``to_csv``
@@ -1664,7 +1664,7 @@ written. For example:
.. code-block:: python
- df.to_excel('path_to_file.xlsx', sheet_name='sheet1')
+ df.to_excel('path_to_file.xlsx', sheet_name='Sheet1')
Files with a ``.xls`` extension will be written using ``xlwt`` and those with
a ``.xlsx`` extension will be written using ``openpyxl``.
@@ -1677,8 +1677,8 @@ one can use the ExcelWriter class, as in the following example:
.. code-block:: python
writer = ExcelWriter('path_to_file.xlsx')
- df1.to_excel(writer, sheet_name='sheet1')
- df2.to_excel(writer, sheet_name='sheet2')
+ df1.to_excel(writer, sheet_name='Sheet1')
+ df2.to_excel(writer, sheet_name='Sheet2')
writer.save()
.. _io.excel.writers:
@@ -1693,11 +1693,29 @@ Excel writer engines
1. the ``engine`` keyword argument
2. the filename extension (via the default specified in config options)
-``pandas`` only supports ``openpyxl`` for ``.xlsx`` and ``.xlsm`` files and
-``xlwt`` for ``.xls`` files. If you have multiple engines installed, you can choose the
-engine to use by default via the options ``io.excel.xlsx.writer`` and
-``io.excel.xls.writer``.
+By default ``pandas`` only supports
+`openpyxl `__ as a writer for ``.xlsx``
+and ``.xlsm`` files and `xlwt `__ as a writer for
+``.xls`` files. If you have multiple engines installed, you can change the
+default engine via the ``io.excel.xlsx.writer`` and ``io.excel.xls.writer``
+options.
+For example if the optional `XlsxWriter `__
+module is installed you can use it as a xlsx writer engine as follows:
+
+.. code-block:: python
+
+ # By setting the 'engine' in the DataFrame and Panel 'to_excel()' methods.
+ df.to_excel('path_to_file.xlsx', sheet_name='Sheet1', engine='xlsxwriter')
+
+ # By setting the 'engine' in the ExcelWriter constructor.
+ writer = ExcelWriter('path_to_file.xlsx', engine='xlsxwriter')
+
+ # Or via pandas configuration.
+ from pandas import set_option
+ set_option('io.excel.xlsx.writer', 'xlsxwriter')
+
+ df.to_excel('path_to_file.xlsx', sheet_name='Sheet1')
.. _io.hdf5:
diff --git a/doc/source/release.rst b/doc/source/release.rst
index f7755afe8caae..b71410a8f7e59 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -113,6 +113,9 @@ Improvements to existing features
``io.excel.xls.writer``. (:issue:`4745`, :issue:`4750`)
- ``Panel.to_excel()`` now accepts keyword arguments that will be passed to
its ``DataFrame``'s ``to_excel()`` methods. (:issue:`4750`)
+ - Added XlsxWriter as an optional ``ExcelWriter`` engine. This is about 5x
+ faster than the default openpyxl xlsx writer and is equivalent in speed
+ to the xlwt xls writer module. (:issue:`4542`)
- allow DataFrame constructor to accept more list-like objects, e.g. list of
``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`,:issue:`4297`, :issue:`4851`),
thanks @lgautier
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 75f81d20926a1..20fe33226d7ca 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1356,7 +1356,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
tupleize_cols=tupleize_cols)
formatter.save()
- def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
+ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
float_format=None, cols=None, header=True, index=True,
index_label=None, startrow=0, startcol=0, engine=None):
"""
@@ -1366,7 +1366,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
----------
excel_writer : string or ExcelWriter object
File path or existing ExcelWriter
- sheet_name : string, default 'sheet1'
+ sheet_name : string, default 'Sheet1'
Name of sheet which will contain DataFrame
na_rep : string, default ''
Missing data representation
@@ -1397,8 +1397,8 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
to the existing workbook. This can be used to save different
DataFrames to one workbook
>>> writer = ExcelWriter('output.xlsx')
- >>> df1.to_excel(writer,'sheet1')
- >>> df2.to_excel(writer,'sheet2')
+ >>> df1.to_excel(writer,'Sheet1')
+ >>> df2.to_excel(writer,'Sheet2')
>>> writer.save()
"""
from pandas.io.excel import ExcelWriter
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index f34c4f99a856d..6ce8eb697268b 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -596,6 +596,7 @@ def _convert_to_style(cls, style_dict, num_format_str=None):
Parameters
----------
style_dict: style dictionary to convert
+ num_format_str: optional number format string
"""
import xlwt
@@ -611,3 +612,95 @@ def _convert_to_style(cls, style_dict, num_format_str=None):
register_writer(_XlwtWriter)
+
+class _XlsxWriter(ExcelWriter):
+ engine = 'xlsxwriter'
+ supported_extensions = ('.xlsx',)
+
+ def __init__(self, path, **engine_kwargs):
+ # Use the xlsxwriter module as the Excel writer.
+ import xlsxwriter
+
+ super(_XlsxWriter, self).__init__(path, **engine_kwargs)
+
+ self.book = xlsxwriter.Workbook(path, **engine_kwargs)
+
+ def save(self):
+ """
+ Save workbook to disk.
+ """
+ return self.book.close()
+
+ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
+ # Write the frame cells using xlsxwriter.
+
+ sheet_name = self._get_sheet_name(sheet_name)
+
+ if sheet_name in self.sheets:
+ wks = self.sheets[sheet_name]
+ else:
+ wks = self.book.add_worksheet(sheet_name)
+ self.sheets[sheet_name] = wks
+
+ style_dict = {}
+
+ for cell in cells:
+ val = _conv_value(cell.val)
+
+ num_format_str = None
+ if isinstance(cell.val, datetime.datetime):
+ num_format_str = "YYYY-MM-DD HH:MM:SS"
+ if isinstance(cell.val, datetime.date):
+ num_format_str = "YYYY-MM-DD"
+
+ stylekey = json.dumps(cell.style)
+ if num_format_str:
+ stylekey += num_format_str
+
+ if stylekey in style_dict:
+ style = style_dict[stylekey]
+ else:
+ style = self._convert_to_style(cell.style, num_format_str)
+ style_dict[stylekey] = style
+
+ if cell.mergestart is not None and cell.mergeend is not None:
+ wks.merge_range(startrow + cell.row,
+ startrow + cell.mergestart,
+ startcol + cell.col,
+ startcol + cell.mergeend,
+ val, style)
+ else:
+ wks.write(startrow + cell.row,
+ startcol + cell.col,
+ val, style)
+
+ def _convert_to_style(self, style_dict, num_format_str=None):
+ """
+ converts a style_dict to an xlsxwriter format object
+ Parameters
+ ----------
+ style_dict: style dictionary to convert
+ num_format_str: optional number format string
+ """
+ if style_dict is None:
+ return None
+
+ # Create a XlsxWriter format object.
+ xl_format = self.book.add_format()
+
+ # Map the cell font to XlsxWriter font properties.
+ if style_dict.get('font'):
+ font = style_dict['font']
+ if font.get('bold'):
+ xl_format.set_bold()
+
+ # Map the cell borders to XlsxWriter border properties.
+ if style_dict.get('borders'):
+ xl_format.set_border()
+
+ if num_format_str is not None:
+ xl_format.set_num_format(num_format_str)
+
+ return xl_format
+
+register_writer(_XlsxWriter)
diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
index 00536026994c5..94f3e5a8cf746 100644
--- a/pandas/io/tests/test_excel.py
+++ b/pandas/io/tests/test_excel.py
@@ -16,9 +16,11 @@
register_writer
)
from pandas.util.testing import ensure_clean
+from pandas.core.config import set_option, get_option
import pandas.util.testing as tm
import pandas as pd
+
def _skip_if_no_xlrd():
try:
import xlrd
@@ -31,18 +33,25 @@ def _skip_if_no_xlrd():
def _skip_if_no_xlwt():
try:
- import xlwt # NOQA
+ import xlwt # NOQA
except ImportError:
raise nose.SkipTest('xlwt not installed, skipping')
def _skip_if_no_openpyxl():
try:
- import openpyxl # NOQA
+ import openpyxl # NOQA
except ImportError:
raise nose.SkipTest('openpyxl not installed, skipping')
+def _skip_if_no_xlsxwriter():
+ try:
+ import xlsxwriter # NOQA
+ except ImportError:
+ raise nose.SkipTest('xlsxwriter not installed, skipping')
+
+
def _skip_if_no_excelsuite():
_skip_if_no_xlrd()
_skip_if_no_xlwt()
@@ -268,15 +277,22 @@ def test_xlsx_table(self):
class ExcelWriterBase(SharedItems):
- # test cases to run with different extensions
- # for each writer
- # to add a writer test, define two things:
- # 1. a check_skip function that skips your tests if your writer isn't
- # installed
- # 2. add a property ext, which is the file extension that your writer writes to
+ # Base class for test cases to run with different Excel writers.
+ # To add a writer test, define the following:
+ # 1. A check_skip function that skips your tests if your writer isn't
+ # installed.
+ # 2. Add a property ext, which is the file extension that your writer
+ # writes to.
+ # 3. Add a property engine_name, which is the name of the writer class.
def setUp(self):
self.check_skip()
super(ExcelWriterBase, self).setUp()
+ self.option_name = 'io.excel.%s.writer' % self.ext
+ self.prev_engine = get_option(self.option_name)
+ set_option(self.option_name, self.engine_name)
+
+ def tearDown(self):
+ set_option(self.option_name, self.prev_engine)
def test_excel_sheet_by_name_raise(self):
_skip_if_no_xlrd()
@@ -790,6 +806,7 @@ def roundtrip(df, header=True, parser_hdr=0):
class OpenpyxlTests(ExcelWriterBase, unittest.TestCase):
ext = 'xlsx'
+ engine_name = 'openpyxl'
check_skip = staticmethod(_skip_if_no_openpyxl)
def test_to_excel_styleconverter(self):
@@ -820,6 +837,7 @@ def test_to_excel_styleconverter(self):
class XlwtTests(ExcelWriterBase, unittest.TestCase):
ext = 'xls'
+ engine_name = 'xlwt'
check_skip = staticmethod(_skip_if_no_xlwt)
def test_to_excel_styleconverter(self):
@@ -841,6 +859,52 @@ def test_to_excel_styleconverter(self):
self.assertEquals(xlwt.Borders.THIN, xls_style.borders.left)
self.assertEquals(xlwt.Alignment.HORZ_CENTER, xls_style.alignment.horz)
+
+class XlsxWriterTests(ExcelWriterBase, unittest.TestCase):
+ ext = 'xlsx'
+ engine_name = 'xlsxwriter'
+ check_skip = staticmethod(_skip_if_no_xlsxwriter)
+
+ # Override test from the Superclass to use assertAlmostEqual on the
+ # floating point values read back in from the output XlsxWriter file.
+ def test_roundtrip_indexlabels(self):
+ _skip_if_no_xlrd()
+ ext = self.ext
+ path = '__tmp_to_excel_from_excel_indexlabels__.' + ext
+
+ with ensure_clean(path) as path:
+
+ self.frame['A'][:5] = nan
+
+ self.frame.to_excel(path, 'test1')
+ self.frame.to_excel(path, 'test1', cols=['A', 'B'])
+ self.frame.to_excel(path, 'test1', header=False)
+ self.frame.to_excel(path, 'test1', index=False)
+
+ # test index_label
+ frame = (DataFrame(np.random.randn(10, 2)) >= 0)
+ frame.to_excel(path, 'test1', index_label=['test'])
+ reader = ExcelFile(path)
+ recons = reader.parse('test1', index_col=0).astype(np.int64)
+ frame.index.names = ['test']
+ self.assertEqual(frame.index.names, recons.index.names)
+
+ frame = (DataFrame(np.random.randn(10, 2)) >= 0)
+ frame.to_excel(
+ path, 'test1', index_label=['test', 'dummy', 'dummy2'])
+ reader = ExcelFile(path)
+ recons = reader.parse('test1', index_col=0).astype(np.int64)
+ frame.index.names = ['test']
+ self.assertEqual(frame.index.names, recons.index.names)
+
+ frame = (DataFrame(np.random.randn(10, 2)) >= 0)
+ frame.to_excel(path, 'test1', index_label='test')
+ reader = ExcelFile(path)
+ recons = reader.parse('test1', index_col=0).astype(np.int64)
+ frame.index.names = ['test']
+ self.assertAlmostEqual(frame.index.names, recons.index.names)
+
+
class ExcelWriterEngineTests(unittest.TestCase):
def test_ExcelWriter_dispatch(self):
with tm.assertRaisesRegexp(ValueError, 'No engine'):
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index fc86a78ea684b..d725a3ff6f135 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -1429,6 +1429,26 @@ def test_to_excel(self):
recdf = reader.parse(str(item), index_col=0)
assert_frame_equal(df, recdf)
+ def test_to_excel_xlsxwriter(self):
+ try:
+ import xlrd
+ import xlsxwriter
+ from pandas.io.excel import ExcelFile
+ except ImportError:
+ raise nose.SkipTest("Requires xlrd and xlsxwriter. Skipping test.")
+
+ path = '__tmp__.xlsx'
+ with ensure_clean(path) as path:
+ self.panel.to_excel(path, engine='xlsxwriter')
+ try:
+ reader = ExcelFile(path)
+ except ImportError:
+ raise nose.SkipTest
+
+ for item, df in compat.iteritems(self.panel):
+ recdf = reader.parse(str(item), index_col=0)
+ assert_frame_equal(df, recdf)
+
def test_dropna(self):
p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde'))
p.ix[:, ['b', 'd'], 0] = np.nan
diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py
index b7b4a936a1e90..d9c642372a9bb 100644
--- a/pandas/util/print_versions.py
+++ b/pandas/util/print_versions.py
@@ -104,6 +104,12 @@ def show_versions():
except:
print("xlwt: Not installed")
+ try:
+ import xlsxwriter
+ print("xlsxwriter: %s" % xlsxwriter.__version__)
+ except:
+ print("xlsxwriter: Not installed")
+
try:
import sqlalchemy
print("sqlalchemy: %s" % sqlalchemy.__version__)