diff --git a/.travis.yml b/.travis.yml index 387dec1ed2658..f46d9c4735ca4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,6 +41,8 @@ before_install: install: - echo "Waldo2" - ci/install.sh + # Temp testing measure while waiting for PyPi release. + - pip install git+git://github.com/kz26/PyExcelerate.git before_script: - mysql -e 'create database pandas_nosetest;' diff --git a/doc/source/install.rst b/doc/source/install.rst index 532c90b83ebb0..4beea77c716ed 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -101,8 +101,8 @@ Optional Dependencies * `openpyxl `__, `xlrd/xlwt `__ * openpyxl version 1.6.1 or higher * Needed for Excel I/O - * `XlsxWriter `__ - * Alternative Excel writer. + * `XlsxWriter `__, `PyExcelerate `__ + * Alternative Excel writers. * `boto `__: necessary for Amazon S3 access. * One of `PyQt4 diff --git a/doc/source/release.rst b/doc/source/release.rst index 8488d03f97cbd..a5c55eaea9e2b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -136,6 +136,8 @@ Improvements to existing features - Added XlsxWriter as an optional ``ExcelWriter`` engine. This is about 5x faster than the default openpyxl xlsx writer and is equivalent in speed to the xlwt xls writer module. (:issue:`4542`) + - Added PyExcelerate as an optional ``ExcelWriter`` engine. This is about + 14x faster than the default openpyxl xlsx writer. - allow DataFrame constructor to accept more list-like objects, e.g. list of ``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`, :issue:`4297`, :issue:`4851`), thanks @lgautier diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 6b83fada19001..3beafed094778 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -745,3 +745,55 @@ def _convert_to_style(self, style_dict, num_format_str=None): return xl_format register_writer(_XlsxWriter) + + +class _PyExcelerate(ExcelWriter): + engine = 'pyexcelerate' + supported_extensions = ('.xlsx',) + + def __init__(self, path, **engine_kwargs): + # Use the pyexcelerate module as the Excel writer. + import pyexcelerate + + super(_PyExcelerate, self).__init__(path, **engine_kwargs) + + self.book = pyexcelerate.Workbook(path, **engine_kwargs) + + def save(self): + """ + Save workbook to disk. + """ + return self.book.save(self.path) + + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + # Write the frame cells using pyexcelerate. + + sheet_name = self._get_sheet_name(sheet_name) + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.new_sheet(sheet_name) + self.sheets[sheet_name] = wks + + for cell in cells: + val = _conv_value(cell.val) + + if isinstance(cell.val, datetime.date): + val = datetime.datetime.fromordinal(val.toordinal()) + + if cell.mergestart is not None and cell.mergeend is not None: +# wks.merge_range(startrow + cell.row, +# startrow + cell.mergestart, +# startcol + cell.col, +# startcol + cell.mergeend, +# val, style) + pass + else: + wks.set_cell_value(1 + startrow + cell.row, + 1 + startcol + cell.col, + val) + + +register_writer(_PyExcelerate) + diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 38b3ee192ab7a..b04647eb43ae0 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -52,6 +52,13 @@ def _skip_if_no_xlsxwriter(): raise nose.SkipTest('xlsxwriter not installed, skipping') +def _skip_if_no_pyexcelerate(): + try: + import pyexcelerate # NOQA + except ImportError: + raise nose.SkipTest('pyexcelerate not installed, skipping') + + def _skip_if_no_excelsuite(): _skip_if_no_xlrd() _skip_if_no_xlwt() @@ -953,6 +960,51 @@ def test_roundtrip_indexlabels(self): self.assertAlmostEqual(frame.index.names, recons.index.names) +class PyExcelerateTests(ExcelWriterBase, unittest.TestCase): + ext = 'xlsx' + engine_name = 'pyexcelerate' + check_skip = staticmethod(_skip_if_no_pyexcelerate) + + # Override test from the Superclass to use assertAlmostEqual on the + # floating point values read back in from the output PyExcelerate file. + def test_roundtrip_indexlabels(self): + _skip_if_no_xlrd() + ext = self.ext + path = '__tmp_to_excel_from_excel_indexlabels__.' + ext + + with ensure_clean(path) as path: + + self.frame['A'][:5] = nan + + self.frame.to_excel(path, 'test1') + self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', header=False) + self.frame.to_excel(path, 'test1', index=False) + + # test index_label + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(path, 'test1', index_label=['test']) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=0).astype(np.int64) + frame.index.names = ['test'] + self.assertEqual(frame.index.names, recons.index.names) + + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel( + path, 'test1', index_label=['test', 'dummy', 'dummy2']) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=0).astype(np.int64) + frame.index.names = ['test'] + self.assertEqual(frame.index.names, recons.index.names) + + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(path, 'test1', index_label='test') + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=0).astype(np.int64) + frame.index.names = ['test'] + self.assertAlmostEqual(frame.index.names, recons.index.names) + + class ExcelWriterEngineTests(unittest.TestCase): def test_ExcelWriter_dispatch(self): with tm.assertRaisesRegexp(ValueError, 'No engine'): @@ -966,11 +1018,11 @@ def test_ExcelWriter_dispatch(self): writer = ExcelWriter('apple.xls') tm.assert_isinstance(writer, _XlwtWriter) - def test_register_writer(self): # some awkward mocking to test out dispatch and such actually works called_save = [] called_write_cells = [] + class DummyClass(ExcelWriter): called_save = False called_write_cells = False @@ -998,7 +1050,6 @@ def check_called(func): func = lambda: df.to_excel('something.test') check_called(func) check_called(lambda: panel.to_excel('something.test')) - from pandas import set_option, get_option val = get_option('io.excel.xlsx.writer') set_option('io.excel.xlsx.writer', 'dummy') check_called(lambda: df.to_excel('something.xlsx')) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 07b33266d88a1..bf0a37aad67cc 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1595,6 +1595,26 @@ def test_to_excel_xlsxwriter(self): recdf = reader.parse(str(item), index_col=0) assert_frame_equal(df, recdf) + def test_to_excel_pyexcelerate(self): + try: + import xlrd + import pyexcelerate + from pandas.io.excel import ExcelFile + except ImportError: + raise nose.SkipTest("Requires xlrd and pyexcelerate. Skipping.") + + path = '__tmp__.xlsx' + with ensure_clean(path) as path: + self.panel.to_excel(path, engine='pyexcelerate') + try: + reader = ExcelFile(path) + except ImportError as e: + raise nose.SkipTest("cannot write excel file: %s" % e) + + for item, df in compat.iteritems(self.panel): + recdf = reader.parse(str(item), index_col=0) + assert_frame_equal(df, recdf) + def test_dropna(self): p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde')) p.ix[:, ['b', 'd'], 0] = np.nan