Skip to content

ENH: Support for PyExcelerate as an Excel writer engine. #5128

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ before_install:
install:
- echo "Waldo2"
- ci/install.sh
- pip install git+git://github.com/jmcnamara/PyExcelerate@pandas
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

put this in one of the requirements files in pandas/ci

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just a a temporary measure while the PyExcelerate devs merge my PR and look at the date issue. Once the patched PyExcelerate is on PyPi I'll update this and the requirement files.


before_script:
- mysql -e 'create database pandas_nosetest;'
Expand Down
4 changes: 2 additions & 2 deletions doc/source/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ Optional Dependencies
* `openpyxl <http://packages.python.org/openpyxl/>`__, `xlrd/xlwt <http://www.python-excel.org/>`__
* openpyxl version 1.6.1 or higher
* Needed for Excel I/O
* `XlsxWriter <https://pypi.python.org/pypi/XlsxWriter>`__
* Alternative Excel writer.
* `XlsxWriter <http://pypi.python.org/pypi/XlsxWriter>`__, `PyExcelerate <http://pypi.python.org/pypi/PyExcelerate>`__
* Alternative Excel writers.
* `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3
access.
* One of `PyQt4
Expand Down
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ Improvements to existing features
- Added XlsxWriter as an optional ``ExcelWriter`` engine. This is about 5x
faster than the default openpyxl xlsx writer and is equivalent in speed
to the xlwt xls writer module. (:issue:`4542`)
- Added PyExcelerate as an optional ``ExcelWriter`` engine. This is about
14x faster than the default openpyxl xlsx writer.
- allow DataFrame constructor to accept more list-like objects, e.g. list of
``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`,
:issue:`4297`, :issue:`4851`), thanks @lgautier
Expand Down
48 changes: 48 additions & 0 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,3 +745,51 @@ def _convert_to_style(self, style_dict, num_format_str=None):
return xl_format

register_writer(_XlsxWriter)


class _PyExcelerate(ExcelWriter):
engine = 'pyexcelerate'
supported_extensions = ('.xlsx',)

def __init__(self, path, **engine_kwargs):
# Use the pyexcelerate module as the Excel writer.
import pyexcelerate

super(_PyExcelerate, self).__init__(path, **engine_kwargs)

self.book = pyexcelerate.Workbook(path, **engine_kwargs)

def save(self):
"""
Save workbook to disk.
"""
return self.book.save(self.path)

def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
# Write the frame cells using pyexcelerate.

sheet_name = self._get_sheet_name(sheet_name)

if sheet_name in self.sheets:
wks = self.sheets[sheet_name]
else:
wks = self.book.new_sheet(sheet_name)
self.sheets[sheet_name] = wks

for cell in cells:
val = _conv_value(cell.val)

if cell.mergestart is not None and cell.mergeend is not None:
# wks.merge_range(startrow + cell.row,
# startrow + cell.mergestart,
# startcol + cell.col,
# startcol + cell.mergeend,
# val, style)
pass
else:
# wks[startrow + cell.row][startcol + cell.col] = val
wks[1 + startrow + cell.row][1 + startcol + cell.col] = val


register_writer(_PyExcelerate)

68 changes: 68 additions & 0 deletions pandas/io/tests/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ def _skip_if_no_xlsxwriter():
raise nose.SkipTest('xlsxwriter not installed, skipping')


def _skip_if_no_pyexcelerate():
try:
import pyexcelerate # NOQA
except ImportError:
raise nose.SkipTest('pyexcelerate not installed, skipping')


def _skip_if_no_excelsuite():
_skip_if_no_xlrd()
_skip_if_no_xlwt()
Expand Down Expand Up @@ -953,6 +960,67 @@ def test_roundtrip_indexlabels(self):
self.assertAlmostEqual(frame.index.names, recons.index.names)


class PyExcelerateTests(ExcelWriterBase, unittest.TestCase):
ext = 'xlsx'
engine_name = 'pyexcelerate'
check_skip = staticmethod(_skip_if_no_pyexcelerate)

# Override test from the Superclass to use assertAlmostEqual on the
# floating point values read back in from the output PyExcelerate file.
def test_roundtrip_indexlabels(self):
_skip_if_no_xlrd()
ext = self.ext
path = '__tmp_to_excel_from_excel_indexlabels__.' + ext

with ensure_clean(path) as path:

self.frame['A'][:5] = nan

self.frame.to_excel(path, 'test1')
self.frame.to_excel(path, 'test1', cols=['A', 'B'])
self.frame.to_excel(path, 'test1', header=False)
self.frame.to_excel(path, 'test1', index=False)

# test index_label
frame = (DataFrame(np.random.randn(10, 2)) >= 0)
frame.to_excel(path, 'test1', index_label=['test'])
reader = ExcelFile(path)
recons = reader.parse('test1', index_col=0).astype(np.int64)
frame.index.names = ['test']
self.assertEqual(frame.index.names, recons.index.names)

frame = (DataFrame(np.random.randn(10, 2)) >= 0)
frame.to_excel(
path, 'test1', index_label=['test', 'dummy', 'dummy2'])
reader = ExcelFile(path)
recons = reader.parse('test1', index_col=0).astype(np.int64)
frame.index.names = ['test']
self.assertEqual(frame.index.names, recons.index.names)

frame = (DataFrame(np.random.randn(10, 2)) >= 0)
frame.to_excel(path, 'test1', index_label='test')
reader = ExcelFile(path)
recons = reader.parse('test1', index_col=0).astype(np.int64)
frame.index.names = ['test']
self.assertAlmostEqual(frame.index.names, recons.index.names)

# TODO: Skip these tests until the pyexcelerator date issue is fixed.
def test_excel_roundtrip_datetime(self):
raise nose.SkipTest('pyexcelerator dates not supported')

def test_sheets(self):
raise nose.SkipTest('pyexcelerator dates not supported')

def test_to_excel_multiindex_dates(self):
raise nose.SkipTest('pyexcelerator dates not supported')

def test_to_excel_periodindex(self):
raise nose.SkipTest('pyexcelerator dates not supported')

def test_tsframe(self):
raise nose.SkipTest('pyexcelerator dates not supported')


class ExcelWriterEngineTests(unittest.TestCase):
def test_ExcelWriter_dispatch(self):
with tm.assertRaisesRegexp(ValueError, 'No engine'):
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1595,6 +1595,28 @@ def test_to_excel_xlsxwriter(self):
recdf = reader.parse(str(item), index_col=0)
assert_frame_equal(df, recdf)

def test_to_excel_pyexcelerate(self):
# TODO: Skip this test until the pyexcelerator date issue is fixed.
raise nose.SkipTest('pyexcelerator dates not supported')
try:
import xlrd
import pyexcelerate
from pandas.io.excel import ExcelFile
except ImportError:
raise nose.SkipTest("Requires xlrd and pyexcelerate. Skipping.")

path = '__tmp__.xlsx'
with ensure_clean(path) as path:
self.panel.to_excel(path, engine='pyexcelerate')
try:
reader = ExcelFile(path)
except ImportError as e:
raise nose.SkipTest("cannot write excel file: %s" % e)

for item, df in compat.iteritems(self.panel):
recdf = reader.parse(str(item), index_col=0)
assert_frame_equal(df, recdf)

def test_dropna(self):
p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde'))
p.ix[:, ['b', 'd'], 0] = np.nan
Expand Down