diff --git a/.travis.yml b/.travis.yml
index 387dec1ed2658..f46d9c4735ca4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -41,6 +41,8 @@ before_install:
install:
- echo "Waldo2"
- ci/install.sh
+ # Temp testing measure while waiting for PyPi release.
+ - pip install git+git://github.com/kz26/PyExcelerate.git
before_script:
- mysql -e 'create database pandas_nosetest;'
diff --git a/doc/source/install.rst b/doc/source/install.rst
index 532c90b83ebb0..4beea77c716ed 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -101,8 +101,8 @@ Optional Dependencies
* `openpyxl `__, `xlrd/xlwt `__
* openpyxl version 1.6.1 or higher
* Needed for Excel I/O
- * `XlsxWriter `__
- * Alternative Excel writer.
+ * `XlsxWriter `__, `PyExcelerate `__
+ * Alternative Excel writers.
* `boto `__: necessary for Amazon S3
access.
* One of `PyQt4
diff --git a/doc/source/release.rst b/doc/source/release.rst
index 8488d03f97cbd..a5c55eaea9e2b 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -136,6 +136,8 @@ Improvements to existing features
- Added XlsxWriter as an optional ``ExcelWriter`` engine. This is about 5x
faster than the default openpyxl xlsx writer and is equivalent in speed
to the xlwt xls writer module. (:issue:`4542`)
+ - Added PyExcelerate as an optional ``ExcelWriter`` engine. This is about
+ 14x faster than the default openpyxl xlsx writer.
- allow DataFrame constructor to accept more list-like objects, e.g. list of
``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`,
:issue:`4297`, :issue:`4851`), thanks @lgautier
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index 6b83fada19001..3beafed094778 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -745,3 +745,55 @@ def _convert_to_style(self, style_dict, num_format_str=None):
return xl_format
register_writer(_XlsxWriter)
+
+
+class _PyExcelerate(ExcelWriter):
+ engine = 'pyexcelerate'
+ supported_extensions = ('.xlsx',)
+
+ def __init__(self, path, **engine_kwargs):
+ # Use the pyexcelerate module as the Excel writer.
+ import pyexcelerate
+
+ super(_PyExcelerate, self).__init__(path, **engine_kwargs)
+
+ self.book = pyexcelerate.Workbook(path, **engine_kwargs)
+
+ def save(self):
+ """
+ Save workbook to disk.
+ """
+ return self.book.save(self.path)
+
+ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
+ # Write the frame cells using pyexcelerate.
+
+ sheet_name = self._get_sheet_name(sheet_name)
+
+ if sheet_name in self.sheets:
+ wks = self.sheets[sheet_name]
+ else:
+ wks = self.book.new_sheet(sheet_name)
+ self.sheets[sheet_name] = wks
+
+ for cell in cells:
+ val = _conv_value(cell.val)
+
+ if isinstance(cell.val, datetime.date):
+ val = datetime.datetime.fromordinal(val.toordinal())
+
+ if cell.mergestart is not None and cell.mergeend is not None:
+# wks.merge_range(startrow + cell.row,
+# startrow + cell.mergestart,
+# startcol + cell.col,
+# startcol + cell.mergeend,
+# val, style)
+ pass
+ else:
+ wks.set_cell_value(1 + startrow + cell.row,
+ 1 + startcol + cell.col,
+ val)
+
+
+register_writer(_PyExcelerate)
+
diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
index 38b3ee192ab7a..b04647eb43ae0 100644
--- a/pandas/io/tests/test_excel.py
+++ b/pandas/io/tests/test_excel.py
@@ -52,6 +52,13 @@ def _skip_if_no_xlsxwriter():
raise nose.SkipTest('xlsxwriter not installed, skipping')
+def _skip_if_no_pyexcelerate():
+ try:
+ import pyexcelerate # NOQA
+ except ImportError:
+ raise nose.SkipTest('pyexcelerate not installed, skipping')
+
+
def _skip_if_no_excelsuite():
_skip_if_no_xlrd()
_skip_if_no_xlwt()
@@ -953,6 +960,51 @@ def test_roundtrip_indexlabels(self):
self.assertAlmostEqual(frame.index.names, recons.index.names)
+class PyExcelerateTests(ExcelWriterBase, unittest.TestCase):
+ ext = 'xlsx'
+ engine_name = 'pyexcelerate'
+ check_skip = staticmethod(_skip_if_no_pyexcelerate)
+
+ # Override test from the Superclass to use assertAlmostEqual on the
+ # floating point values read back in from the output PyExcelerate file.
+ def test_roundtrip_indexlabels(self):
+ _skip_if_no_xlrd()
+ ext = self.ext
+ path = '__tmp_to_excel_from_excel_indexlabels__.' + ext
+
+ with ensure_clean(path) as path:
+
+ self.frame['A'][:5] = nan
+
+ self.frame.to_excel(path, 'test1')
+ self.frame.to_excel(path, 'test1', cols=['A', 'B'])
+ self.frame.to_excel(path, 'test1', header=False)
+ self.frame.to_excel(path, 'test1', index=False)
+
+ # test index_label
+ frame = (DataFrame(np.random.randn(10, 2)) >= 0)
+ frame.to_excel(path, 'test1', index_label=['test'])
+ reader = ExcelFile(path)
+ recons = reader.parse('test1', index_col=0).astype(np.int64)
+ frame.index.names = ['test']
+ self.assertEqual(frame.index.names, recons.index.names)
+
+ frame = (DataFrame(np.random.randn(10, 2)) >= 0)
+ frame.to_excel(
+ path, 'test1', index_label=['test', 'dummy', 'dummy2'])
+ reader = ExcelFile(path)
+ recons = reader.parse('test1', index_col=0).astype(np.int64)
+ frame.index.names = ['test']
+ self.assertEqual(frame.index.names, recons.index.names)
+
+ frame = (DataFrame(np.random.randn(10, 2)) >= 0)
+ frame.to_excel(path, 'test1', index_label='test')
+ reader = ExcelFile(path)
+ recons = reader.parse('test1', index_col=0).astype(np.int64)
+ frame.index.names = ['test']
+ self.assertAlmostEqual(frame.index.names, recons.index.names)
+
+
class ExcelWriterEngineTests(unittest.TestCase):
def test_ExcelWriter_dispatch(self):
with tm.assertRaisesRegexp(ValueError, 'No engine'):
@@ -966,11 +1018,11 @@ def test_ExcelWriter_dispatch(self):
writer = ExcelWriter('apple.xls')
tm.assert_isinstance(writer, _XlwtWriter)
-
def test_register_writer(self):
# some awkward mocking to test out dispatch and such actually works
called_save = []
called_write_cells = []
+
class DummyClass(ExcelWriter):
called_save = False
called_write_cells = False
@@ -998,7 +1050,6 @@ def check_called(func):
func = lambda: df.to_excel('something.test')
check_called(func)
check_called(lambda: panel.to_excel('something.test'))
- from pandas import set_option, get_option
val = get_option('io.excel.xlsx.writer')
set_option('io.excel.xlsx.writer', 'dummy')
check_called(lambda: df.to_excel('something.xlsx'))
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index 07b33266d88a1..bf0a37aad67cc 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -1595,6 +1595,26 @@ def test_to_excel_xlsxwriter(self):
recdf = reader.parse(str(item), index_col=0)
assert_frame_equal(df, recdf)
+ def test_to_excel_pyexcelerate(self):
+ try:
+ import xlrd
+ import pyexcelerate
+ from pandas.io.excel import ExcelFile
+ except ImportError:
+ raise nose.SkipTest("Requires xlrd and pyexcelerate. Skipping.")
+
+ path = '__tmp__.xlsx'
+ with ensure_clean(path) as path:
+ self.panel.to_excel(path, engine='pyexcelerate')
+ try:
+ reader = ExcelFile(path)
+ except ImportError as e:
+ raise nose.SkipTest("cannot write excel file: %s" % e)
+
+ for item, df in compat.iteritems(self.panel):
+ recdf = reader.parse(str(item), index_col=0)
+ assert_frame_equal(df, recdf)
+
def test_dropna(self):
p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde'))
p.ix[:, ['b', 'd'], 0] = np.nan