From e8a553806a9ceedd7101c727062721fb8d3a41b3 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 15 Sep 2013 08:48:28 -0400 Subject: [PATCH 1/8] TST: Cleanup Excel tests to make it easier to add and test additional writers --- pandas/io/tests/test_excel.py | 527 +++++++++++++++------------------- 1 file changed, 225 insertions(+), 302 deletions(-) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index a9822ea0b46c9..00536026994c5 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -1,12 +1,7 @@ # pylint: disable=E1101 -from pandas.compat import StringIO, BytesIO, PY3, u, range, map -from datetime import datetime -from os.path import split as psplit -import csv +from pandas.compat import u, range, map import os -import sys -import re import unittest import nose @@ -14,51 +9,36 @@ from numpy import nan import numpy as np -from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex -import pandas.io.parsers as parsers -from pandas.io.parsers import (read_csv, read_table, read_fwf, - TextParser, TextFileReader) +from pandas import DataFrame, Index, MultiIndex +from pandas.io.parsers import read_csv from pandas.io.excel import ( ExcelFile, ExcelWriter, read_excel, _XlwtWriter, _OpenpyxlWriter, register_writer ) -from pandas.util.testing import (assert_almost_equal, - assert_series_equal, - network, - ensure_clean) +from pandas.util.testing import ensure_clean import pandas.util.testing as tm import pandas as pd -import pandas.lib as lib -from pandas import compat -from pandas.lib import Timestamp -from pandas.tseries.index import date_range -import pandas.tseries.tools as tools - -from numpy.testing.decorators import slow - -from pandas.parser import OverflowError - def _skip_if_no_xlrd(): try: import xlrd ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2])) if ver < (0, 9): - raise nose.SkipTest('xlrd not installed, skipping') + raise nose.SkipTest('xlrd < 0.9, skipping') except ImportError: raise nose.SkipTest('xlrd not installed, skipping') def _skip_if_no_xlwt(): try: - import xlwt + import xlwt # NOQA except ImportError: raise nose.SkipTest('xlwt not installed, skipping') def _skip_if_no_openpyxl(): try: - import openpyxl + import openpyxl # NOQA except ImportError: raise nose.SkipTest('openpyxl not installed, skipping') @@ -78,8 +58,7 @@ def _skip_if_no_excelsuite(): _mixed_frame['foo'] = 'bar' -class ExcelTests(unittest.TestCase): - +class SharedItems(object): def setUp(self): self.dirpath = tm.get_data_path() self.csv1 = os.path.join(self.dirpath, 'test1.csv') @@ -91,6 +70,13 @@ def setUp(self): self.tsframe = _tsframe.copy() self.mixed_frame = _mixed_frame.copy() + def read_csv(self, *args, **kwds): + kwds = kwds.copy() + kwds['engine'] = 'python' + return read_csv(*args, **kwds) + + +class ExcelReaderTests(SharedItems, unittest.TestCase): def test_parse_cols_int(self): _skip_if_no_openpyxl() _skip_if_no_xlrd() @@ -226,24 +212,6 @@ def test_excel_table_sheet_by_index(self): (self.xlsx1, self.csv1)]: self.check_excel_table_sheet_by_index(filename, csvfile) - def check_excel_sheet_by_name_raise(self, ext): - import xlrd - pth = os.path.join(self.dirpath, 'testit.{0}'.format(ext)) - - with ensure_clean(pth) as pth: - gt = DataFrame(np.random.randn(10, 2)) - gt.to_excel(pth) - xl = ExcelFile(pth) - df = xl.parse(0) - tm.assert_frame_equal(gt, df) - - self.assertRaises(xlrd.XLRDError, xl.parse, '0') - - def test_excel_sheet_by_name_raise(self): - _skip_if_no_xlrd() - _skip_if_no_xlwt() - for ext in ('xls', 'xlsx'): - self.check_excel_sheet_by_name_raise(ext) def test_excel_table(self): _skip_if_no_xlrd() @@ -276,7 +244,7 @@ def test_excel_read_buffer(self): pth = os.path.join(self.dirpath, 'test.xlsx') f = open(pth, 'rb') xl = ExcelFile(f) - df = xl.parse('Sheet1', index_col=0, parse_dates=True) + xl.parse('Sheet1', index_col=0, parse_dates=True) def test_xlsx_table(self): _skip_if_no_xlrd() @@ -298,32 +266,37 @@ def test_xlsx_table(self): tm.assert_frame_equal(df4, df.ix[:-1]) tm.assert_frame_equal(df4, df5) - def test_specify_kind_xls(self): - _skip_if_no_xlrd() - xlsx_file = os.path.join(self.dirpath, 'test.xlsx') - xls_file = os.path.join(self.dirpath, 'test.xls') - # succeeds with xlrd 0.8.0, weird - # self.assertRaises(Exception, ExcelFile, xlsx_file, kind='xls') +class ExcelWriterBase(SharedItems): + # test cases to run with different extensions + # for each writer + # to add a writer test, define two things: + # 1. a check_skip function that skips your tests if your writer isn't + # installed + # 2. add a property ext, which is the file extension that your writer writes to + def setUp(self): + self.check_skip() + super(ExcelWriterBase, self).setUp() - # ExcelFile(open(xls_file, 'rb'), kind='xls') - # self.assertRaises(Exception, ExcelFile, open(xlsx_file, 'rb'), - # kind='xls') + def test_excel_sheet_by_name_raise(self): + _skip_if_no_xlrd() + import xlrd - def read_csv(self, *args, **kwds): - kwds = kwds.copy() - kwds['engine'] = 'python' - return read_csv(*args, **kwds) + ext = self.ext + pth = os.path.join(self.dirpath, 'testit.{0}'.format(ext)) - def test_excel_roundtrip_xls(self): - _skip_if_no_excelsuite() - self._check_extension('xls') + with ensure_clean(pth) as pth: + gt = DataFrame(np.random.randn(10, 2)) + gt.to_excel(pth) + xl = ExcelFile(pth) + df = xl.parse(0) + tm.assert_frame_equal(gt, df) - def test_excel_roundtrip_xlsx(self): - _skip_if_no_excelsuite() - self._check_extension('xlsx') + self.assertRaises(xlrd.XLRDError, xl.parse, '0') - def _check_extension(self, ext): + def test_roundtrip(self): + _skip_if_no_xlrd() + ext = self.ext path = '__tmp_to_excel_from_excel__.' + ext with ensure_clean(path) as path: @@ -357,19 +330,9 @@ def _check_extension(self, ext): recons = read_excel(path, 'test1', index_col=0, na_values=[88,88.0]) tm.assert_frame_equal(self.frame, recons) - def test_excel_roundtrip_xls_mixed(self): + def test_mixed(self): _skip_if_no_xlrd() - _skip_if_no_xlwt() - - self._check_extension_mixed('xls') - - def test_excel_roundtrip_xlsx_mixed(self): - _skip_if_no_openpyxl() - _skip_if_no_xlrd() - - self._check_extension_mixed('xlsx') - - def _check_extension_mixed(self, ext): + ext = self.ext path = '__tmp_to_excel_from_excel_mixed__.' + ext with ensure_clean(path) as path: @@ -378,18 +341,10 @@ def _check_extension_mixed(self, ext): recons = reader.parse('test1', index_col=0) tm.assert_frame_equal(self.mixed_frame, recons) - def test_excel_roundtrip_xls_tsframe(self): - _skip_if_no_xlrd() - _skip_if_no_xlwt() - - self._check_extension_tsframe('xls') - def test_excel_roundtrip_xlsx_tsframe(self): - _skip_if_no_openpyxl() + def test_tsframe(self): _skip_if_no_xlrd() - self._check_extension_tsframe('xlsx') - - def _check_extension_tsframe(self, ext): + ext = self.ext path = '__tmp_to_excel_from_excel_tsframe__.' + ext df = tm.makeTimeDataFrame()[:5] @@ -400,15 +355,9 @@ def _check_extension_tsframe(self, ext): recons = reader.parse('test1') tm.assert_frame_equal(df, recons) - def test_excel_roundtrip_xls_int64(self): - _skip_if_no_excelsuite() - self._check_extension_int64('xls') - - def test_excel_roundtrip_xlsx_int64(self): - _skip_if_no_excelsuite() - self._check_extension_int64('xlsx') - - def _check_extension_int64(self, ext): + def test_int64(self): + _skip_if_no_xlrd() + ext = self.ext path = '__tmp_to_excel_from_excel_int64__.' + ext with ensure_clean(path) as path: @@ -426,15 +375,9 @@ def _check_extension_int64(self, ext): recons = reader.parse('test1').astype(np.int64) tm.assert_frame_equal(frame, recons, check_dtype=False) - def test_excel_roundtrip_xls_bool(self): - _skip_if_no_excelsuite() - self._check_extension_bool('xls') - - def test_excel_roundtrip_xlsx_bool(self): - _skip_if_no_excelsuite() - self._check_extension_bool('xlsx') - - def _check_extension_bool(self, ext): + def test_bool(self): + _skip_if_no_xlrd() + ext = self.ext path = '__tmp_to_excel_from_excel_bool__.' + ext with ensure_clean(path) as path: @@ -452,15 +395,9 @@ def _check_extension_bool(self, ext): recons = reader.parse('test1').astype(np.bool8) tm.assert_frame_equal(frame, recons) - def test_excel_roundtrip_xls_sheets(self): - _skip_if_no_excelsuite() - self._check_extension_sheets('xls') - - def test_excel_roundtrip_xlsx_sheets(self): - _skip_if_no_excelsuite() - self._check_extension_sheets('xlsx') - - def _check_extension_sheets(self, ext): + def test_sheets(self): + _skip_if_no_xlrd() + ext = self.ext path = '__tmp_to_excel_from_excel_sheets__.' + ext with ensure_clean(path) as path: @@ -485,15 +422,9 @@ def _check_extension_sheets(self, ext): np.testing.assert_equal('test1', reader.sheet_names[0]) np.testing.assert_equal('test2', reader.sheet_names[1]) - def test_excel_roundtrip_xls_colaliases(self): - _skip_if_no_excelsuite() - self._check_extension_colaliases('xls') - - def test_excel_roundtrip_xlsx_colaliases(self): - _skip_if_no_excelsuite() - self._check_extension_colaliases('xlsx') - - def _check_extension_colaliases(self, ext): + def test_colaliases(self): + _skip_if_no_xlrd() + ext = self.ext path = '__tmp_to_excel_from_excel_aliases__.' + ext with ensure_clean(path) as path: @@ -513,15 +444,9 @@ def _check_extension_colaliases(self, ext): xp.columns = col_aliases tm.assert_frame_equal(xp, rs) - def test_excel_roundtrip_xls_indexlabels(self): - _skip_if_no_excelsuite() - self._check_extension_indexlabels('xls') - - def test_excel_roundtrip_xlsx_indexlabels(self): - _skip_if_no_excelsuite() - self._check_extension_indexlabels('xlsx') - - def _check_extension_indexlabels(self, ext): + def test_roundtrip_indexlabels(self): + _skip_if_no_xlrd() + ext = self.ext path = '__tmp_to_excel_from_excel_indexlabels__.' + ext with ensure_clean(path) as path: @@ -557,7 +482,7 @@ def _check_extension_indexlabels(self, ext): self.assertEqual(frame.index.names, recons.index.names) # test index_labels in same row as column names - path = '%s.xls' % tm.rands(10) + path = '%s.%s' % (tm.rands(10), ext) with ensure_clean(path) as path: @@ -574,9 +499,8 @@ def _check_extension_indexlabels(self, ext): def test_excel_roundtrip_indexname(self): _skip_if_no_xlrd() - _skip_if_no_xlwt() - path = '%s.xls' % tm.rands(10) + path = '%s.%s' % (tm.rands(10), self.ext) df = DataFrame(np.random.randn(10, 4)) df.index.name = 'foo' @@ -592,10 +516,9 @@ def test_excel_roundtrip_indexname(self): def test_excel_roundtrip_datetime(self): _skip_if_no_xlrd() - _skip_if_no_xlwt() # datetime.date, not sure what to test here exactly - path = '__tmp_excel_roundtrip_datetime__.xls' + path = '__tmp_excel_roundtrip_datetime__.' + self.ext tsf = self.tsframe.copy() with ensure_clean(path) as path: @@ -605,86 +528,22 @@ def test_excel_roundtrip_datetime(self): recons = reader.parse('test1') tm.assert_frame_equal(self.tsframe, recons) - def test_ExcelWriter_dispatch(self): - with tm.assertRaisesRegexp(ValueError, 'No engine'): - writer = ExcelWriter('nothing') - - _skip_if_no_openpyxl() - writer = ExcelWriter('apple.xlsx') - tm.assert_isinstance(writer, _OpenpyxlWriter) - - _skip_if_no_xlwt() - writer = ExcelWriter('apple.xls') - tm.assert_isinstance(writer, _XlwtWriter) - - - def test_register_writer(self): - # some awkward mocking to test out dispatch and such actually works - called_save = [] - called_write_cells = [] - class DummyClass(ExcelWriter): - called_save = False - called_write_cells = False - supported_extensions = ['test', 'xlsx', 'xls'] - engine = 'dummy' - - def save(self): - called_save.append(True) - - def write_cells(self, *args, **kwargs): - called_write_cells.append(True) - - def check_called(func): - func() - self.assert_(len(called_save) >= 1) - self.assert_(len(called_write_cells) >= 1) - del called_save[:] - del called_write_cells[:] - - register_writer(DummyClass) - writer = ExcelWriter('something.test') - tm.assert_isinstance(writer, DummyClass) - df = tm.makeCustomDataframe(1, 1) - panel = tm.makePanel() - func = lambda: df.to_excel('something.test') - check_called(func) - check_called(lambda: panel.to_excel('something.test')) - from pandas import set_option, get_option - val = get_option('io.excel.xlsx.writer') - set_option('io.excel.xlsx.writer', 'dummy') - check_called(lambda: df.to_excel('something.xlsx')) - check_called(lambda: df.to_excel('something.xls', engine='dummy')) - set_option('io.excel.xlsx.writer', val) - - - def test_to_excel_periodindex(self): - _skip_if_no_excelsuite() - - for ext in ['xls', 'xlsx']: - path = '__tmp_to_excel_periodindex__.' + ext - frame = self.tsframe - xp = frame.resample('M', kind='period') + _skip_if_no_xlrd() + path = '__tmp_to_excel_periodindex__.' + self.ext + frame = self.tsframe + xp = frame.resample('M', kind='period') - with ensure_clean(path) as path: - xp.to_excel(path, 'sht1') + with ensure_clean(path) as path: + xp.to_excel(path, 'sht1') - reader = ExcelFile(path) - rs = reader.parse('sht1', index_col=0, parse_dates=True) - tm.assert_frame_equal(xp, rs.to_period('M')) + reader = ExcelFile(path) + rs = reader.parse('sht1', index_col=0, parse_dates=True) + tm.assert_frame_equal(xp, rs.to_period('M')) def test_to_excel_multiindex(self): _skip_if_no_xlrd() - _skip_if_no_xlwt() - - self._check_excel_multiindex('xls') - - def test_to_excel_multiindex_xlsx(self): - _skip_if_no_xlrd() - _skip_if_no_openpyxl() - self._check_excel_multiindex('xlsx') - - def _check_excel_multiindex(self, ext): + ext = self.ext path = '__tmp_to_excel_multiindex__' + ext + '__.' + ext frame = self.frame @@ -708,15 +567,7 @@ def _check_excel_multiindex(self, ext): def test_to_excel_multiindex_dates(self): _skip_if_no_xlrd() - _skip_if_no_xlwt() - self._check_excel_multiindex_dates('xls') - - def test_to_excel_multiindex_xlsx_dates(self): - _skip_if_no_openpyxl() - _skip_if_no_xlrd() - self._check_excel_multiindex_dates('xlsx') - - def _check_excel_multiindex_dates(self, ext): + ext = self.ext path = '__tmp_to_excel_multiindex_dates__' + ext + '__.' + ext # try multiindex with dates @@ -742,83 +593,48 @@ def _check_excel_multiindex_dates(self, ext): self.tsframe.index = old_index # needed if setUP becomes classmethod def test_to_excel_float_format(self): - _skip_if_no_excelsuite() - for ext in ['xls', 'xlsx']: - filename = '__tmp_to_excel_float_format__.' + ext - df = DataFrame([[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - with ensure_clean(filename) as filename: - df.to_excel(filename, 'test1', float_format='%.2f') - - reader = ExcelFile(filename) - rs = reader.parse('test1', index_col=None) - xp = DataFrame([[0.12, 0.23, 0.57], - [12.32, 123123.20, 321321.20]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - tm.assert_frame_equal(rs, xp) + _skip_if_no_xlrd() + ext = self.ext + filename = '__tmp_to_excel_float_format__.' + ext + df = DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + with ensure_clean(filename) as filename: + df.to_excel(filename, 'test1', float_format='%.2f') + + reader = ExcelFile(filename) + rs = reader.parse('test1', index_col=None) + xp = DataFrame([[0.12, 0.23, 0.57], + [12.32, 123123.20, 321321.20]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + tm.assert_frame_equal(rs, xp) def test_to_excel_unicode_filename(self): - _skip_if_no_excelsuite() - - for ext in ['xls', 'xlsx']: - filename = u('\u0192u.') + ext - - try: - f = open(filename, 'wb') - except UnicodeEncodeError: - raise nose.SkipTest('no unicode file names on this system') - else: - f.close() - - df = DataFrame([[0.123456, 0.234567, 0.567567], - [12.32112, 123123.2, 321321.2]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - - with ensure_clean(filename) as filename: - df.to_excel(filename, 'test1', float_format='%.2f') - - reader = ExcelFile(filename) - rs = reader.parse('test1', index_col=None) - xp = DataFrame([[0.12, 0.23, 0.57], - [12.32, 123123.20, 321321.20]], - index=['A', 'B'], columns=['X', 'Y', 'Z']) - tm.assert_frame_equal(rs, xp) - - def test_to_excel_styleconverter(self): - _skip_if_no_xlwt() - _skip_if_no_openpyxl() - - import xlwt - import openpyxl - - hstyle = {"font": {"bold": True}, - "borders": {"top": "thin", - "right": "thin", - "bottom": "thin", - "left": "thin"}, - "alignment": {"horizontal": "center"}} - xls_style = _XlwtWriter._convert_to_style(hstyle) - self.assertTrue(xls_style.font.bold) - self.assertEquals(xlwt.Borders.THIN, xls_style.borders.top) - self.assertEquals(xlwt.Borders.THIN, xls_style.borders.right) - self.assertEquals(xlwt.Borders.THIN, xls_style.borders.bottom) - self.assertEquals(xlwt.Borders.THIN, xls_style.borders.left) - self.assertEquals(xlwt.Alignment.HORZ_CENTER, xls_style.alignment.horz) - - xlsx_style = _OpenpyxlWriter._convert_to_style(hstyle) - self.assertTrue(xlsx_style.font.bold) - self.assertEquals(openpyxl.style.Border.BORDER_THIN, - xlsx_style.borders.top.border_style) - self.assertEquals(openpyxl.style.Border.BORDER_THIN, - xlsx_style.borders.right.border_style) - self.assertEquals(openpyxl.style.Border.BORDER_THIN, - xlsx_style.borders.bottom.border_style) - self.assertEquals(openpyxl.style.Border.BORDER_THIN, - xlsx_style.borders.left.border_style) - self.assertEquals(openpyxl.style.Alignment.HORIZONTAL_CENTER, - xlsx_style.alignment.horizontal) + _skip_if_no_xlrd() + ext = self.ext + filename = u('\u0192u.') + ext + + try: + f = open(filename, 'wb') + except UnicodeEncodeError: + raise nose.SkipTest('no unicode file names on this system') + else: + f.close() + + df = DataFrame([[0.123456, 0.234567, 0.567567], + [12.32112, 123123.2, 321321.2]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + + with ensure_clean(filename) as filename: + df.to_excel(filename, 'test1', float_format='%.2f') + + reader = ExcelFile(filename) + rs = reader.parse('test1', index_col=None) + xp = DataFrame([[0.12, 0.23, 0.57], + [12.32, 123123.20, 321321.20]], + index=['A', 'B'], columns=['X', 'Y', 'Z']) + tm.assert_frame_equal(rs, xp) # def test_to_excel_header_styling_xls(self): @@ -921,14 +737,13 @@ def test_to_excel_styleconverter(self): # self.assertTrue(ws.cell(maddr).merged) # os.remove(filename) def test_excel_010_hemstring(self): - _skip_if_no_excelsuite() - + _skip_if_no_xlrd() from pandas.util.testing import makeCustomDataframe as mkdf # ensure limited functionality in 0.10 # override of #2370 until sorted out in 0.11 def roundtrip(df, header=True, parser_hdr=0): - path = '__tmp__test_xl_010_%s__.xls' % np.random.randint(1, 10000) + path = '__tmp__test_xl_010_%s__.%s' % (np.random.randint(1, 10000), self.ext) df.to_excel(path, header=header) with ensure_clean(path) as path: @@ -972,12 +787,120 @@ def roundtrip(df, header=True, parser_hdr=0): self.assertEqual(res.shape, (1, 2)) self.assertTrue(res.ix[0, 0] is not np.nan) + +class OpenpyxlTests(ExcelWriterBase, unittest.TestCase): + ext = 'xlsx' + check_skip = staticmethod(_skip_if_no_openpyxl) + + def test_to_excel_styleconverter(self): + _skip_if_no_openpyxl() + + import openpyxl + + hstyle = {"font": {"bold": True}, + "borders": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "alignment": {"horizontal": "center"}} + + xlsx_style = _OpenpyxlWriter._convert_to_style(hstyle) + self.assertTrue(xlsx_style.font.bold) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + xlsx_style.borders.top.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + xlsx_style.borders.right.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + xlsx_style.borders.bottom.border_style) + self.assertEquals(openpyxl.style.Border.BORDER_THIN, + xlsx_style.borders.left.border_style) + self.assertEquals(openpyxl.style.Alignment.HORIZONTAL_CENTER, + xlsx_style.alignment.horizontal) + + +class XlwtTests(ExcelWriterBase, unittest.TestCase): + ext = 'xls' + check_skip = staticmethod(_skip_if_no_xlwt) + + def test_to_excel_styleconverter(self): + _skip_if_no_xlwt() + + import xlwt + + hstyle = {"font": {"bold": True}, + "borders": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "alignment": {"horizontal": "center"}} + xls_style = _XlwtWriter._convert_to_style(hstyle) + self.assertTrue(xls_style.font.bold) + self.assertEquals(xlwt.Borders.THIN, xls_style.borders.top) + self.assertEquals(xlwt.Borders.THIN, xls_style.borders.right) + self.assertEquals(xlwt.Borders.THIN, xls_style.borders.bottom) + self.assertEquals(xlwt.Borders.THIN, xls_style.borders.left) + self.assertEquals(xlwt.Alignment.HORZ_CENTER, xls_style.alignment.horz) + +class ExcelWriterEngineTests(unittest.TestCase): + def test_ExcelWriter_dispatch(self): + with tm.assertRaisesRegexp(ValueError, 'No engine'): + writer = ExcelWriter('nothing') + + _skip_if_no_openpyxl() + writer = ExcelWriter('apple.xlsx') + tm.assert_isinstance(writer, _OpenpyxlWriter) + + _skip_if_no_xlwt() + writer = ExcelWriter('apple.xls') + tm.assert_isinstance(writer, _XlwtWriter) + + + def test_register_writer(self): + # some awkward mocking to test out dispatch and such actually works + called_save = [] + called_write_cells = [] + class DummyClass(ExcelWriter): + called_save = False + called_write_cells = False + supported_extensions = ['test', 'xlsx', 'xls'] + engine = 'dummy' + + def save(self): + called_save.append(True) + + def write_cells(self, *args, **kwargs): + called_write_cells.append(True) + + def check_called(func): + func() + self.assert_(len(called_save) >= 1) + self.assert_(len(called_write_cells) >= 1) + del called_save[:] + del called_write_cells[:] + + register_writer(DummyClass) + writer = ExcelWriter('something.test') + tm.assert_isinstance(writer, DummyClass) + df = tm.makeCustomDataframe(1, 1) + panel = tm.makePanel() + func = lambda: df.to_excel('something.test') + check_called(func) + check_called(lambda: panel.to_excel('something.test')) + from pandas import set_option, get_option + val = get_option('io.excel.xlsx.writer') + set_option('io.excel.xlsx.writer', 'dummy') + check_called(lambda: df.to_excel('something.xlsx')) + check_called(lambda: df.to_excel('something.xls', engine='dummy')) + set_option('io.excel.xlsx.writer', val) + + +class ExcelLegacyTests(SharedItems, unittest.TestCase): def test_deprecated_from_parsers(self): # since 0.12 changed the import path import warnings - with warnings.catch_warnings() as w: + with warnings.catch_warnings(): warnings.filterwarnings(action='ignore', category=FutureWarning) _skip_if_no_xlrd() From 857a578a4d93e1da2aa7a92f842b91928d1ef1ca Mon Sep 17 00:00:00 2001 From: John McNamara Date: Sun, 15 Sep 2013 21:34:30 +0100 Subject: [PATCH 2/8] ENH: Added xlsxwriter as an ExcelWriter option. Added xlsxwriter as an optional writer engine. Issue #4542. --- ci/requirements-2.6.txt | 1 + ci/requirements-2.7.txt | 1 + ci/requirements-2.7_LOCALE.txt | 1 + ci/requirements-3.2.txt | 1 + ci/requirements-3.3.txt | 1 + doc/source/10min.rst | 4 +- doc/source/install.rst | 2 + doc/source/io.rst | 34 ++++++++++--- pandas/core/frame.py | 8 +-- pandas/io/excel.py | 93 ++++++++++++++++++++++++++++++++++ pandas/tests/test_panel.py | 20 ++++++++ pandas/util/print_versions.py | 6 +++ 12 files changed, 158 insertions(+), 14 deletions(-) diff --git a/ci/requirements-2.6.txt b/ci/requirements-2.6.txt index 5038b9e2b6552..8bdace67c66e1 100644 --- a/ci/requirements-2.6.txt +++ b/ci/requirements-2.6.txt @@ -4,3 +4,4 @@ python-dateutil==1.5 pytz==2013b http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz html5lib==1.0b2 +xlsxwriter==0.4.3 diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt index 6a94d48ad7a5f..2e903102de7b1 100644 --- a/ci/requirements-2.7.txt +++ b/ci/requirements-2.7.txt @@ -8,6 +8,7 @@ numexpr==2.1 tables==2.3.1 matplotlib==1.1.1 openpyxl==1.6.2 +xlsxwriter==0.4.3 xlrd==0.9.2 patsy==0.1.0 html5lib==1.0b2 diff --git a/ci/requirements-2.7_LOCALE.txt b/ci/requirements-2.7_LOCALE.txt index a7e9d62e3549b..056b63bbb8591 100644 --- a/ci/requirements-2.7_LOCALE.txt +++ b/ci/requirements-2.7_LOCALE.txt @@ -2,6 +2,7 @@ python-dateutil pytz==2013b xlwt==0.7.5 openpyxl==1.6.2 +xlsxwriter==0.4.3 xlrd==0.9.2 numpy==1.6.1 cython==0.19.1 diff --git a/ci/requirements-3.2.txt b/ci/requirements-3.2.txt index e907a2fa828f1..b689047019ed7 100644 --- a/ci/requirements-3.2.txt +++ b/ci/requirements-3.2.txt @@ -1,6 +1,7 @@ python-dateutil==2.1 pytz==2013b openpyxl==1.6.2 +xlsxwriter==0.4.3 xlrd==0.9.2 numpy==1.6.2 cython==0.19.1 diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt index eb1e725d98040..326098be5f7f4 100644 --- a/ci/requirements-3.3.txt +++ b/ci/requirements-3.3.txt @@ -1,6 +1,7 @@ python-dateutil==2.1 pytz==2013b openpyxl==1.6.2 +xlsxwriter==0.4.3 xlrd==0.9.2 html5lib==1.0b2 numpy==1.7.1 diff --git a/doc/source/10min.rst b/doc/source/10min.rst index 58c5b54968614..705514ac0c364 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -695,13 +695,13 @@ Writing to an excel file .. ipython:: python - df.to_excel('foo.xlsx', sheet_name='sheet1') + df.to_excel('foo.xlsx', sheet_name='Sheet1') Reading from an excel file .. ipython:: python - pd.read_excel('foo.xlsx', 'sheet1', index_col=None, na_values=['NA']) + pd.read_excel('foo.xlsx', 'Sheet1', index_col=None, na_values=['NA']) .. ipython:: python :suppress: diff --git a/doc/source/install.rst b/doc/source/install.rst index 4472d844c1871..b1dcad9448cfd 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -100,6 +100,8 @@ Optional Dependencies * `openpyxl `__, `xlrd/xlwt `__ * openpyxl version 1.6.1 or higher * Needed for Excel I/O + * `XlsxWriter `__ + * Alternative Excel writer. * `boto `__: necessary for Amazon S3 access. * One of `PyQt4 diff --git a/doc/source/io.rst b/doc/source/io.rst index 9fd2c167fa605..244c5ccdeba22 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1654,7 +1654,7 @@ indices to be parsed. .. code-block:: python - read_excel('path_to_file.xls', Sheet1', parse_cols=[0, 2, 3], index_col=None, na_values=['NA']) + read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3], index_col=None, na_values=['NA']) To write a DataFrame object to a sheet of an Excel file, you can use the ``to_excel`` instance method. The arguments are largely the same as ``to_csv`` @@ -1664,7 +1664,7 @@ written. For example: .. code-block:: python - df.to_excel('path_to_file.xlsx', sheet_name='sheet1') + df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') Files with a ``.xls`` extension will be written using ``xlwt`` and those with a ``.xlsx`` extension will be written using ``openpyxl``. @@ -1677,8 +1677,8 @@ one can use the ExcelWriter class, as in the following example: .. code-block:: python writer = ExcelWriter('path_to_file.xlsx') - df1.to_excel(writer, sheet_name='sheet1') - df2.to_excel(writer, sheet_name='sheet2') + df1.to_excel(writer, sheet_name='Sheet1') + df2.to_excel(writer, sheet_name='Sheet2') writer.save() .. _io.excel.writers: @@ -1693,11 +1693,29 @@ Excel writer engines 1. the ``engine`` keyword argument 2. the filename extension (via the default specified in config options) -``pandas`` only supports ``openpyxl`` for ``.xlsx`` and ``.xlsm`` files and -``xlwt`` for ``.xls`` files. If you have multiple engines installed, you can choose the -engine to use by default via the options ``io.excel.xlsx.writer`` and -``io.excel.xls.writer``. +By default ``pandas`` only supports +`openpyxl `__ as a writer for ``.xlsx`` +and ``.xlsm`` files and `xlwt `__ as a writer for +``.xls`` files. If you have multiple engines installed, you can change the +default engine via the ``io.excel.xlsx.writer`` and ``io.excel.xls.writer`` +options. +For example if the optional `XlsxWriter `__ +module is installed you can use it as a xlsx writer engine as follows: + +.. code-block:: python + + # By setting the 'engine' in the DataFrame and Panel 'to_excel()' methods. + df.to_excel('path_to_file.xlsx', sheet_name='Sheet1', engine='xlsxwriter') + + # By setting the 'engine' in the ExcelWriter constructor. + writer = ExcelWriter('path_to_file.xlsx', engine='xlsxwriter') + + # Or via pandas configuration. + from pandas import set_option + set_option('io.excel.xlsx.writer', 'xlsxwriter') + + df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') .. _io.hdf5: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f56b6bc00cf15..f8a4aa1eae68f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1357,7 +1357,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, tupleize_cols=tupleize_cols) formatter.save() - def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', + def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None): """ @@ -1367,7 +1367,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', ---------- excel_writer : string or ExcelWriter object File path or existing ExcelWriter - sheet_name : string, default 'sheet1' + sheet_name : string, default 'Sheet1' Name of sheet which will contain DataFrame na_rep : string, default '' Missing data representation @@ -1398,8 +1398,8 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', to the existing workbook. This can be used to save different DataFrames to one workbook >>> writer = ExcelWriter('output.xlsx') - >>> df1.to_excel(writer,'sheet1') - >>> df2.to_excel(writer,'sheet2') + >>> df1.to_excel(writer,'Sheet1') + >>> df2.to_excel(writer,'Sheet2') >>> writer.save() """ from pandas.io.excel import ExcelWriter diff --git a/pandas/io/excel.py b/pandas/io/excel.py index f34c4f99a856d..6ce8eb697268b 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -596,6 +596,7 @@ def _convert_to_style(cls, style_dict, num_format_str=None): Parameters ---------- style_dict: style dictionary to convert + num_format_str: optional number format string """ import xlwt @@ -611,3 +612,95 @@ def _convert_to_style(cls, style_dict, num_format_str=None): register_writer(_XlwtWriter) + +class _XlsxWriter(ExcelWriter): + engine = 'xlsxwriter' + supported_extensions = ('.xlsx',) + + def __init__(self, path, **engine_kwargs): + # Use the xlsxwriter module as the Excel writer. + import xlsxwriter + + super(_XlsxWriter, self).__init__(path, **engine_kwargs) + + self.book = xlsxwriter.Workbook(path, **engine_kwargs) + + def save(self): + """ + Save workbook to disk. + """ + return self.book.close() + + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + # Write the frame cells using xlsxwriter. + + sheet_name = self._get_sheet_name(sheet_name) + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.add_worksheet(sheet_name) + self.sheets[sheet_name] = wks + + style_dict = {} + + for cell in cells: + val = _conv_value(cell.val) + + num_format_str = None + if isinstance(cell.val, datetime.datetime): + num_format_str = "YYYY-MM-DD HH:MM:SS" + if isinstance(cell.val, datetime.date): + num_format_str = "YYYY-MM-DD" + + stylekey = json.dumps(cell.style) + if num_format_str: + stylekey += num_format_str + + if stylekey in style_dict: + style = style_dict[stylekey] + else: + style = self._convert_to_style(cell.style, num_format_str) + style_dict[stylekey] = style + + if cell.mergestart is not None and cell.mergeend is not None: + wks.merge_range(startrow + cell.row, + startrow + cell.mergestart, + startcol + cell.col, + startcol + cell.mergeend, + val, style) + else: + wks.write(startrow + cell.row, + startcol + cell.col, + val, style) + + def _convert_to_style(self, style_dict, num_format_str=None): + """ + converts a style_dict to an xlsxwriter format object + Parameters + ---------- + style_dict: style dictionary to convert + num_format_str: optional number format string + """ + if style_dict is None: + return None + + # Create a XlsxWriter format object. + xl_format = self.book.add_format() + + # Map the cell font to XlsxWriter font properties. + if style_dict.get('font'): + font = style_dict['font'] + if font.get('bold'): + xl_format.set_bold() + + # Map the cell borders to XlsxWriter border properties. + if style_dict.get('borders'): + xl_format.set_border() + + if num_format_str is not None: + xl_format.set_num_format(num_format_str) + + return xl_format + +register_writer(_XlsxWriter) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index fc86a78ea684b..3fad8124143ec 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1429,6 +1429,26 @@ def test_to_excel(self): recdf = reader.parse(str(item), index_col=0) assert_frame_equal(df, recdf) + def test_to_excel_xlsxwriter(self): + try: + import xlrd + import xlsxwriter + from pandas.io.excel import ExcelFile + except ImportError: + raise nose.SkipTest + + path = '__tmp__.xlsx' + with ensure_clean(path) as path: + self.panel.to_excel(path, engine='xlsxwriter') + try: + reader = ExcelFile(path) + except ImportError: + raise nose.SkipTest + + for item, df in compat.iteritems(self.panel): + recdf = reader.parse(str(item), index_col=0) + assert_frame_equal(df, recdf) + def test_dropna(self): p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde')) p.ix[:, ['b', 'd'], 0] = np.nan diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py index b7b4a936a1e90..d9c642372a9bb 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/print_versions.py @@ -104,6 +104,12 @@ def show_versions(): except: print("xlwt: Not installed") + try: + import xlsxwriter + print("xlsxwriter: %s" % xlsxwriter.__version__) + except: + print("xlsxwriter: Not installed") + try: import sqlalchemy print("sqlalchemy: %s" % sqlalchemy.__version__) From 547f211f1699b8775905097aa0d602095dbf7790 Mon Sep 17 00:00:00 2001 From: John McNamara Date: Sun, 15 Sep 2013 21:34:30 +0100 Subject: [PATCH 3/8] ENH: Added xlsxwriter as an ExcelWriter option. Added xlsxwriter as an optional writer engine. Issue #4542. --- ci/requirements-2.6.txt | 1 + ci/requirements-2.7.txt | 1 + ci/requirements-2.7_LOCALE.txt | 1 + ci/requirements-3.2.txt | 1 + ci/requirements-3.3.txt | 1 + doc/source/10min.rst | 4 +- doc/source/install.rst | 2 + doc/source/io.rst | 34 ++++++++++--- pandas/core/frame.py | 8 +-- pandas/io/excel.py | 93 ++++++++++++++++++++++++++++++++++ pandas/tests/test_panel.py | 20 ++++++++ pandas/util/print_versions.py | 6 +++ 12 files changed, 158 insertions(+), 14 deletions(-) diff --git a/ci/requirements-2.6.txt b/ci/requirements-2.6.txt index 5038b9e2b6552..8bdace67c66e1 100644 --- a/ci/requirements-2.6.txt +++ b/ci/requirements-2.6.txt @@ -4,3 +4,4 @@ python-dateutil==1.5 pytz==2013b http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz html5lib==1.0b2 +xlsxwriter==0.4.3 diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt index 6a94d48ad7a5f..2e903102de7b1 100644 --- a/ci/requirements-2.7.txt +++ b/ci/requirements-2.7.txt @@ -8,6 +8,7 @@ numexpr==2.1 tables==2.3.1 matplotlib==1.1.1 openpyxl==1.6.2 +xlsxwriter==0.4.3 xlrd==0.9.2 patsy==0.1.0 html5lib==1.0b2 diff --git a/ci/requirements-2.7_LOCALE.txt b/ci/requirements-2.7_LOCALE.txt index a7e9d62e3549b..056b63bbb8591 100644 --- a/ci/requirements-2.7_LOCALE.txt +++ b/ci/requirements-2.7_LOCALE.txt @@ -2,6 +2,7 @@ python-dateutil pytz==2013b xlwt==0.7.5 openpyxl==1.6.2 +xlsxwriter==0.4.3 xlrd==0.9.2 numpy==1.6.1 cython==0.19.1 diff --git a/ci/requirements-3.2.txt b/ci/requirements-3.2.txt index e907a2fa828f1..b689047019ed7 100644 --- a/ci/requirements-3.2.txt +++ b/ci/requirements-3.2.txt @@ -1,6 +1,7 @@ python-dateutil==2.1 pytz==2013b openpyxl==1.6.2 +xlsxwriter==0.4.3 xlrd==0.9.2 numpy==1.6.2 cython==0.19.1 diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt index eb1e725d98040..326098be5f7f4 100644 --- a/ci/requirements-3.3.txt +++ b/ci/requirements-3.3.txt @@ -1,6 +1,7 @@ python-dateutil==2.1 pytz==2013b openpyxl==1.6.2 +xlsxwriter==0.4.3 xlrd==0.9.2 html5lib==1.0b2 numpy==1.7.1 diff --git a/doc/source/10min.rst b/doc/source/10min.rst index 58c5b54968614..705514ac0c364 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -695,13 +695,13 @@ Writing to an excel file .. ipython:: python - df.to_excel('foo.xlsx', sheet_name='sheet1') + df.to_excel('foo.xlsx', sheet_name='Sheet1') Reading from an excel file .. ipython:: python - pd.read_excel('foo.xlsx', 'sheet1', index_col=None, na_values=['NA']) + pd.read_excel('foo.xlsx', 'Sheet1', index_col=None, na_values=['NA']) .. ipython:: python :suppress: diff --git a/doc/source/install.rst b/doc/source/install.rst index 4472d844c1871..b1dcad9448cfd 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -100,6 +100,8 @@ Optional Dependencies * `openpyxl `__, `xlrd/xlwt `__ * openpyxl version 1.6.1 or higher * Needed for Excel I/O + * `XlsxWriter `__ + * Alternative Excel writer. * `boto `__: necessary for Amazon S3 access. * One of `PyQt4 diff --git a/doc/source/io.rst b/doc/source/io.rst index c29af29d2e63f..50d2323fcc8b0 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1654,7 +1654,7 @@ indices to be parsed. .. code-block:: python - read_excel('path_to_file.xls', Sheet1', parse_cols=[0, 2, 3], index_col=None, na_values=['NA']) + read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3], index_col=None, na_values=['NA']) To write a DataFrame object to a sheet of an Excel file, you can use the ``to_excel`` instance method. The arguments are largely the same as ``to_csv`` @@ -1664,7 +1664,7 @@ written. For example: .. code-block:: python - df.to_excel('path_to_file.xlsx', sheet_name='sheet1') + df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') Files with a ``.xls`` extension will be written using ``xlwt`` and those with a ``.xlsx`` extension will be written using ``openpyxl``. @@ -1677,8 +1677,8 @@ one can use the ExcelWriter class, as in the following example: .. code-block:: python writer = ExcelWriter('path_to_file.xlsx') - df1.to_excel(writer, sheet_name='sheet1') - df2.to_excel(writer, sheet_name='sheet2') + df1.to_excel(writer, sheet_name='Sheet1') + df2.to_excel(writer, sheet_name='Sheet2') writer.save() .. _io.excel.writers: @@ -1693,11 +1693,29 @@ Excel writer engines 1. the ``engine`` keyword argument 2. the filename extension (via the default specified in config options) -``pandas`` only supports ``openpyxl`` for ``.xlsx`` and ``.xlsm`` files and -``xlwt`` for ``.xls`` files. If you have multiple engines installed, you can choose the -engine to use by default via the options ``io.excel.xlsx.writer`` and -``io.excel.xls.writer``. +By default ``pandas`` only supports +`openpyxl `__ as a writer for ``.xlsx`` +and ``.xlsm`` files and `xlwt `__ as a writer for +``.xls`` files. If you have multiple engines installed, you can change the +default engine via the ``io.excel.xlsx.writer`` and ``io.excel.xls.writer`` +options. +For example if the optional `XlsxWriter `__ +module is installed you can use it as a xlsx writer engine as follows: + +.. code-block:: python + + # By setting the 'engine' in the DataFrame and Panel 'to_excel()' methods. + df.to_excel('path_to_file.xlsx', sheet_name='Sheet1', engine='xlsxwriter') + + # By setting the 'engine' in the ExcelWriter constructor. + writer = ExcelWriter('path_to_file.xlsx', engine='xlsxwriter') + + # Or via pandas configuration. + from pandas import set_option + set_option('io.excel.xlsx.writer', 'xlsxwriter') + + df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') .. _io.hdf5: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f56b6bc00cf15..f8a4aa1eae68f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1357,7 +1357,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, tupleize_cols=tupleize_cols) formatter.save() - def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', + def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None): """ @@ -1367,7 +1367,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', ---------- excel_writer : string or ExcelWriter object File path or existing ExcelWriter - sheet_name : string, default 'sheet1' + sheet_name : string, default 'Sheet1' Name of sheet which will contain DataFrame na_rep : string, default '' Missing data representation @@ -1398,8 +1398,8 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', to the existing workbook. This can be used to save different DataFrames to one workbook >>> writer = ExcelWriter('output.xlsx') - >>> df1.to_excel(writer,'sheet1') - >>> df2.to_excel(writer,'sheet2') + >>> df1.to_excel(writer,'Sheet1') + >>> df2.to_excel(writer,'Sheet2') >>> writer.save() """ from pandas.io.excel import ExcelWriter diff --git a/pandas/io/excel.py b/pandas/io/excel.py index f34c4f99a856d..6ce8eb697268b 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -596,6 +596,7 @@ def _convert_to_style(cls, style_dict, num_format_str=None): Parameters ---------- style_dict: style dictionary to convert + num_format_str: optional number format string """ import xlwt @@ -611,3 +612,95 @@ def _convert_to_style(cls, style_dict, num_format_str=None): register_writer(_XlwtWriter) + +class _XlsxWriter(ExcelWriter): + engine = 'xlsxwriter' + supported_extensions = ('.xlsx',) + + def __init__(self, path, **engine_kwargs): + # Use the xlsxwriter module as the Excel writer. + import xlsxwriter + + super(_XlsxWriter, self).__init__(path, **engine_kwargs) + + self.book = xlsxwriter.Workbook(path, **engine_kwargs) + + def save(self): + """ + Save workbook to disk. + """ + return self.book.close() + + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + # Write the frame cells using xlsxwriter. + + sheet_name = self._get_sheet_name(sheet_name) + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.add_worksheet(sheet_name) + self.sheets[sheet_name] = wks + + style_dict = {} + + for cell in cells: + val = _conv_value(cell.val) + + num_format_str = None + if isinstance(cell.val, datetime.datetime): + num_format_str = "YYYY-MM-DD HH:MM:SS" + if isinstance(cell.val, datetime.date): + num_format_str = "YYYY-MM-DD" + + stylekey = json.dumps(cell.style) + if num_format_str: + stylekey += num_format_str + + if stylekey in style_dict: + style = style_dict[stylekey] + else: + style = self._convert_to_style(cell.style, num_format_str) + style_dict[stylekey] = style + + if cell.mergestart is not None and cell.mergeend is not None: + wks.merge_range(startrow + cell.row, + startrow + cell.mergestart, + startcol + cell.col, + startcol + cell.mergeend, + val, style) + else: + wks.write(startrow + cell.row, + startcol + cell.col, + val, style) + + def _convert_to_style(self, style_dict, num_format_str=None): + """ + converts a style_dict to an xlsxwriter format object + Parameters + ---------- + style_dict: style dictionary to convert + num_format_str: optional number format string + """ + if style_dict is None: + return None + + # Create a XlsxWriter format object. + xl_format = self.book.add_format() + + # Map the cell font to XlsxWriter font properties. + if style_dict.get('font'): + font = style_dict['font'] + if font.get('bold'): + xl_format.set_bold() + + # Map the cell borders to XlsxWriter border properties. + if style_dict.get('borders'): + xl_format.set_border() + + if num_format_str is not None: + xl_format.set_num_format(num_format_str) + + return xl_format + +register_writer(_XlsxWriter) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index fc86a78ea684b..3fad8124143ec 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1429,6 +1429,26 @@ def test_to_excel(self): recdf = reader.parse(str(item), index_col=0) assert_frame_equal(df, recdf) + def test_to_excel_xlsxwriter(self): + try: + import xlrd + import xlsxwriter + from pandas.io.excel import ExcelFile + except ImportError: + raise nose.SkipTest + + path = '__tmp__.xlsx' + with ensure_clean(path) as path: + self.panel.to_excel(path, engine='xlsxwriter') + try: + reader = ExcelFile(path) + except ImportError: + raise nose.SkipTest + + for item, df in compat.iteritems(self.panel): + recdf = reader.parse(str(item), index_col=0) + assert_frame_equal(df, recdf) + def test_dropna(self): p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde')) p.ix[:, ['b', 'd'], 0] = np.nan diff --git a/pandas/util/print_versions.py b/pandas/util/print_versions.py index b7b4a936a1e90..d9c642372a9bb 100644 --- a/pandas/util/print_versions.py +++ b/pandas/util/print_versions.py @@ -104,6 +104,12 @@ def show_versions(): except: print("xlwt: Not installed") + try: + import xlsxwriter + print("xlsxwriter: %s" % xlsxwriter.__version__) + except: + print("xlsxwriter: Not installed") + try: import sqlalchemy print("sqlalchemy: %s" % sqlalchemy.__version__) From d8ef328045a7904279abdcd110dc5859766d772c Mon Sep 17 00:00:00 2001 From: John McNamara Date: Mon, 16 Sep 2013 08:19:34 +0100 Subject: [PATCH 4/8] CLN: Using new excel test class for test cases. --- ci/requirements-2.6.txt | 1 - pandas/io/tests/test_excel.py | 78 +++++++++++++++++++++++++++++++---- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/ci/requirements-2.6.txt b/ci/requirements-2.6.txt index 8bdace67c66e1..5038b9e2b6552 100644 --- a/ci/requirements-2.6.txt +++ b/ci/requirements-2.6.txt @@ -4,4 +4,3 @@ python-dateutil==1.5 pytz==2013b http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz html5lib==1.0b2 -xlsxwriter==0.4.3 diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 00536026994c5..0f4668d871b88 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -16,9 +16,11 @@ register_writer ) from pandas.util.testing import ensure_clean +from pandas.core.config import set_option, get_option import pandas.util.testing as tm import pandas as pd + def _skip_if_no_xlrd(): try: import xlrd @@ -31,18 +33,25 @@ def _skip_if_no_xlrd(): def _skip_if_no_xlwt(): try: - import xlwt # NOQA + import xlwt # NOQA except ImportError: raise nose.SkipTest('xlwt not installed, skipping') def _skip_if_no_openpyxl(): try: - import openpyxl # NOQA + import openpyxl # NOQA except ImportError: raise nose.SkipTest('openpyxl not installed, skipping') +def _skip_if_no_xlsxwriter(): + try: + import xlsxwriter # NOQA + except ImportError: + raise nose.SkipTest('xlsxwriter not installed, skipping') + + def _skip_if_no_excelsuite(): _skip_if_no_xlrd() _skip_if_no_xlwt() @@ -268,15 +277,22 @@ def test_xlsx_table(self): class ExcelWriterBase(SharedItems): - # test cases to run with different extensions - # for each writer - # to add a writer test, define two things: + # Base class for test cases to run with different writers + # To add a writer test, define two things: # 1. a check_skip function that skips your tests if your writer isn't - # installed - # 2. add a property ext, which is the file extension that your writer writes to + # installed + # 2. add a property ext, which is the file extension that your writer + # writes to + # 3. add a property engine_name, which is the name of the writer def setUp(self): self.check_skip() super(ExcelWriterBase, self).setUp() + self.option_name = 'io.excel.%s.writer' % self.ext + self.prev_engine = get_option(self.option_name) + set_option(self.option_name, self.engine_name) + + def tearDown(self): + set_option(self.option_name, self.prev_engine) def test_excel_sheet_by_name_raise(self): _skip_if_no_xlrd() @@ -790,6 +806,7 @@ def roundtrip(df, header=True, parser_hdr=0): class OpenpyxlTests(ExcelWriterBase, unittest.TestCase): ext = 'xlsx' + engine_name = 'openpyxl' check_skip = staticmethod(_skip_if_no_openpyxl) def test_to_excel_styleconverter(self): @@ -820,6 +837,7 @@ def test_to_excel_styleconverter(self): class XlwtTests(ExcelWriterBase, unittest.TestCase): ext = 'xls' + engine_name = 'xlwt' check_skip = staticmethod(_skip_if_no_xlwt) def test_to_excel_styleconverter(self): @@ -841,6 +859,52 @@ def test_to_excel_styleconverter(self): self.assertEquals(xlwt.Borders.THIN, xls_style.borders.left) self.assertEquals(xlwt.Alignment.HORZ_CENTER, xls_style.alignment.horz) + +class XlsxWriterTests(ExcelWriterBase, unittest.TestCase): + ext = 'xlsx' + engine_name = 'xlsxwriter' + check_skip = staticmethod(_skip_if_no_xlsxwriter) + + # Override test from the Superclass to use assertAlmostEqual on the + # floating point values read back in from the output XlsxWriter file. + def test_roundtrip_indexlabels(self): + _skip_if_no_xlrd() + ext = self.ext + path = '__tmp_to_excel_from_excel_indexlabels__.' + ext + + with ensure_clean(path) as path: + + self.frame['A'][:5] = nan + + self.frame.to_excel(path, 'test1') + self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', header=False) + self.frame.to_excel(path, 'test1', index=False) + + # test index_label + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(path, 'test1', index_label=['test']) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=0).astype(np.int64) + frame.index.names = ['test'] + self.assertEqual(frame.index.names, recons.index.names) + + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel( + path, 'test1', index_label=['test', 'dummy', 'dummy2']) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=0).astype(np.int64) + frame.index.names = ['test'] + self.assertEqual(frame.index.names, recons.index.names) + + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(path, 'test1', index_label='test') + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=0).astype(np.int64) + frame.index.names = ['test'] + self.assertAlmostEqual(frame.index.names, recons.index.names) + + class ExcelWriterEngineTests(unittest.TestCase): def test_ExcelWriter_dispatch(self): with tm.assertRaisesRegexp(ValueError, 'No engine'): From 1f4beee48ee2d6a04c91a24e4f4d694b51cae633 Mon Sep 17 00:00:00 2001 From: John McNamara Date: Mon, 16 Sep 2013 08:22:51 +0100 Subject: [PATCH 5/8] Removed dependency from Python 2.6 requirements. --- ci/requirements-2.6.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/requirements-2.6.txt b/ci/requirements-2.6.txt index 8bdace67c66e1..5038b9e2b6552 100644 --- a/ci/requirements-2.6.txt +++ b/ci/requirements-2.6.txt @@ -4,4 +4,3 @@ python-dateutil==1.5 pytz==2013b http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz html5lib==1.0b2 -xlsxwriter==0.4.3 From ca62471b5bef76cea28f9ab5f49e73588880720d Mon Sep 17 00:00:00 2001 From: John McNamara Date: Mon, 16 Sep 2013 20:19:15 +0100 Subject: [PATCH 6/8] Restored test_excel.py from master. --- pandas/io/tests/test_excel.py | 78 ++++------------------------------- 1 file changed, 7 insertions(+), 71 deletions(-) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 0f4668d871b88..00536026994c5 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -16,11 +16,9 @@ register_writer ) from pandas.util.testing import ensure_clean -from pandas.core.config import set_option, get_option import pandas.util.testing as tm import pandas as pd - def _skip_if_no_xlrd(): try: import xlrd @@ -33,25 +31,18 @@ def _skip_if_no_xlrd(): def _skip_if_no_xlwt(): try: - import xlwt # NOQA + import xlwt # NOQA except ImportError: raise nose.SkipTest('xlwt not installed, skipping') def _skip_if_no_openpyxl(): try: - import openpyxl # NOQA + import openpyxl # NOQA except ImportError: raise nose.SkipTest('openpyxl not installed, skipping') -def _skip_if_no_xlsxwriter(): - try: - import xlsxwriter # NOQA - except ImportError: - raise nose.SkipTest('xlsxwriter not installed, skipping') - - def _skip_if_no_excelsuite(): _skip_if_no_xlrd() _skip_if_no_xlwt() @@ -277,22 +268,15 @@ def test_xlsx_table(self): class ExcelWriterBase(SharedItems): - # Base class for test cases to run with different writers - # To add a writer test, define two things: + # test cases to run with different extensions + # for each writer + # to add a writer test, define two things: # 1. a check_skip function that skips your tests if your writer isn't - # installed - # 2. add a property ext, which is the file extension that your writer - # writes to - # 3. add a property engine_name, which is the name of the writer + # installed + # 2. add a property ext, which is the file extension that your writer writes to def setUp(self): self.check_skip() super(ExcelWriterBase, self).setUp() - self.option_name = 'io.excel.%s.writer' % self.ext - self.prev_engine = get_option(self.option_name) - set_option(self.option_name, self.engine_name) - - def tearDown(self): - set_option(self.option_name, self.prev_engine) def test_excel_sheet_by_name_raise(self): _skip_if_no_xlrd() @@ -806,7 +790,6 @@ def roundtrip(df, header=True, parser_hdr=0): class OpenpyxlTests(ExcelWriterBase, unittest.TestCase): ext = 'xlsx' - engine_name = 'openpyxl' check_skip = staticmethod(_skip_if_no_openpyxl) def test_to_excel_styleconverter(self): @@ -837,7 +820,6 @@ def test_to_excel_styleconverter(self): class XlwtTests(ExcelWriterBase, unittest.TestCase): ext = 'xls' - engine_name = 'xlwt' check_skip = staticmethod(_skip_if_no_xlwt) def test_to_excel_styleconverter(self): @@ -859,52 +841,6 @@ def test_to_excel_styleconverter(self): self.assertEquals(xlwt.Borders.THIN, xls_style.borders.left) self.assertEquals(xlwt.Alignment.HORZ_CENTER, xls_style.alignment.horz) - -class XlsxWriterTests(ExcelWriterBase, unittest.TestCase): - ext = 'xlsx' - engine_name = 'xlsxwriter' - check_skip = staticmethod(_skip_if_no_xlsxwriter) - - # Override test from the Superclass to use assertAlmostEqual on the - # floating point values read back in from the output XlsxWriter file. - def test_roundtrip_indexlabels(self): - _skip_if_no_xlrd() - ext = self.ext - path = '__tmp_to_excel_from_excel_indexlabels__.' + ext - - with ensure_clean(path) as path: - - self.frame['A'][:5] = nan - - self.frame.to_excel(path, 'test1') - self.frame.to_excel(path, 'test1', cols=['A', 'B']) - self.frame.to_excel(path, 'test1', header=False) - self.frame.to_excel(path, 'test1', index=False) - - # test index_label - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, 'test1', index_label=['test']) - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0).astype(np.int64) - frame.index.names = ['test'] - self.assertEqual(frame.index.names, recons.index.names) - - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel( - path, 'test1', index_label=['test', 'dummy', 'dummy2']) - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0).astype(np.int64) - frame.index.names = ['test'] - self.assertEqual(frame.index.names, recons.index.names) - - frame = (DataFrame(np.random.randn(10, 2)) >= 0) - frame.to_excel(path, 'test1', index_label='test') - reader = ExcelFile(path) - recons = reader.parse('test1', index_col=0).astype(np.int64) - frame.index.names = ['test'] - self.assertAlmostEqual(frame.index.names, recons.index.names) - - class ExcelWriterEngineTests(unittest.TestCase): def test_ExcelWriter_dispatch(self): with tm.assertRaisesRegexp(ValueError, 'No engine'): From 89b36e27446308f37f5dede7f80a0736cdc6ce77 Mon Sep 17 00:00:00 2001 From: John McNamara Date: Mon, 16 Sep 2013 20:19:38 +0100 Subject: [PATCH 7/8] Added tests for xlsxwriter. --- pandas/io/tests/test_excel.py | 65 +++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 00536026994c5..d43b213b2644a 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -16,9 +16,11 @@ register_writer ) from pandas.util.testing import ensure_clean +from pandas.core.config import set_option, get_option import pandas.util.testing as tm import pandas as pd + def _skip_if_no_xlrd(): try: import xlrd @@ -31,17 +33,22 @@ def _skip_if_no_xlrd(): def _skip_if_no_xlwt(): try: - import xlwt # NOQA + import xlwt # NOQA except ImportError: raise nose.SkipTest('xlwt not installed, skipping') def _skip_if_no_openpyxl(): try: - import openpyxl # NOQA + import openpyxl # NOQA except ImportError: raise nose.SkipTest('openpyxl not installed, skipping') +def _skip_if_no_xlsxwriter(): + try: + import xlsxwriter # NOQA + except ImportError: + raise nose.SkipTest('xlsxwriter not installed, skipping') def _skip_if_no_excelsuite(): _skip_if_no_xlrd() @@ -277,6 +284,12 @@ class ExcelWriterBase(SharedItems): def setUp(self): self.check_skip() super(ExcelWriterBase, self).setUp() + self.option_name = 'io.excel.%s.writer' % self.ext + self.prev_engine = get_option(self.option_name) + set_option(self.option_name, self.engine_name) + + def tearDown(self): + set_option(self.option_name, self.prev_engine) def test_excel_sheet_by_name_raise(self): _skip_if_no_xlrd() @@ -790,6 +803,7 @@ def roundtrip(df, header=True, parser_hdr=0): class OpenpyxlTests(ExcelWriterBase, unittest.TestCase): ext = 'xlsx' + engine_name = 'openpyxl' check_skip = staticmethod(_skip_if_no_openpyxl) def test_to_excel_styleconverter(self): @@ -820,6 +834,7 @@ def test_to_excel_styleconverter(self): class XlwtTests(ExcelWriterBase, unittest.TestCase): ext = 'xls' + engine_name = 'xlwt' check_skip = staticmethod(_skip_if_no_xlwt) def test_to_excel_styleconverter(self): @@ -841,6 +856,52 @@ def test_to_excel_styleconverter(self): self.assertEquals(xlwt.Borders.THIN, xls_style.borders.left) self.assertEquals(xlwt.Alignment.HORZ_CENTER, xls_style.alignment.horz) + +class XlsxWriterTests(ExcelWriterBase, unittest.TestCase): + ext = 'xlsx' + engine_name = 'xlsxwriter' + check_skip = staticmethod(_skip_if_no_xlsxwriter) + + # Override test from the Superclass to use assertAlmostEqual on the + # floating point values read back in from the output XlsxWriter file. + def test_roundtrip_indexlabels(self): + _skip_if_no_xlrd() + ext = self.ext + path = '__tmp_to_excel_from_excel_indexlabels__.' + ext + + with ensure_clean(path) as path: + + self.frame['A'][:5] = nan + + self.frame.to_excel(path, 'test1') + self.frame.to_excel(path, 'test1', cols=['A', 'B']) + self.frame.to_excel(path, 'test1', header=False) + self.frame.to_excel(path, 'test1', index=False) + + # test index_label + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(path, 'test1', index_label=['test']) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=0).astype(np.int64) + frame.index.names = ['test'] + self.assertEqual(frame.index.names, recons.index.names) + + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel( + path, 'test1', index_label=['test', 'dummy', 'dummy2']) + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=0).astype(np.int64) + frame.index.names = ['test'] + self.assertEqual(frame.index.names, recons.index.names) + + frame = (DataFrame(np.random.randn(10, 2)) >= 0) + frame.to_excel(path, 'test1', index_label='test') + reader = ExcelFile(path) + recons = reader.parse('test1', index_col=0).astype(np.int64) + frame.index.names = ['test'] + self.assertAlmostEqual(frame.index.names, recons.index.names) + + class ExcelWriterEngineTests(unittest.TestCase): def test_ExcelWriter_dispatch(self): with tm.assertRaisesRegexp(ValueError, 'No engine'): From 20519475391a81377bc1012785251bbcb2e952d9 Mon Sep 17 00:00:00 2001 From: John McNamara Date: Mon, 16 Sep 2013 20:40:14 +0100 Subject: [PATCH 8/8] Minor doc fix. --- pandas/io/tests/test_excel.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index d43b213b2644a..e55868f9bcd63 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -44,12 +44,14 @@ def _skip_if_no_openpyxl(): except ImportError: raise nose.SkipTest('openpyxl not installed, skipping') + def _skip_if_no_xlsxwriter(): try: import xlsxwriter # NOQA except ImportError: raise nose.SkipTest('xlsxwriter not installed, skipping') + def _skip_if_no_excelsuite(): _skip_if_no_xlrd() _skip_if_no_xlwt() @@ -275,12 +277,13 @@ def test_xlsx_table(self): class ExcelWriterBase(SharedItems): - # test cases to run with different extensions - # for each writer - # to add a writer test, define two things: - # 1. a check_skip function that skips your tests if your writer isn't - # installed - # 2. add a property ext, which is the file extension that your writer writes to + # Base class for test cases to run with different Excel writers. + # To add a writer test, define two things: + # 1. A check_skip function that skips your tests if your writer isn't + # installed. + # 2. Add a property ext, which is the file extension that your writer + # writes to. + # 3. Add a property engine_name, which is the name of the writer class. def setUp(self): self.check_skip() super(ExcelWriterBase, self).setUp()