From 99d8835020202dd5d2af8a97627f03dc0a3b9f67 Mon Sep 17 00:00:00 2001 From: "John W. O'Brien" Date: Tue, 24 Jun 2014 23:57:35 -0400 Subject: [PATCH] ENH/TST/DOC: Implement experimental io.excel._Openpyxl2Writer --- doc/source/io.rst | 7 +- doc/source/v0.15.0.txt | 6 + pandas/compat/openpyxl_compat.py | 21 +- pandas/io/excel.py | 446 ++++++++++++++++++++++++++++++- pandas/io/tests/test_excel.py | 115 ++++++-- 5 files changed, 558 insertions(+), 37 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index 273cbd5daae7d..06600208758bd 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2009,7 +2009,12 @@ files if `Xlsxwriter`_ is not available. .. _xlwt: http://www.python-excel.org To specify which writer you want to use, you can pass an engine keyword -argument to ``to_excel`` and to ``ExcelWriter``. +argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: + +- `'openpyxl`': This includes stable support for OpenPyxl 1.6.1 up to but + not including 2.0.0, and experimental support for OpenPyxl 2.0.0 and later. +- `'xlsxwriter'` +- `'xlwt'` .. code-block:: python diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 6db3fcaa832c0..6ae767f90208d 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -673,6 +673,12 @@ Enhancements +- Added experimental compatibility with openpyxl v2. The ``DataFrame.to_excel`` + method ``engine`` keyword now recognizes ``openpyxl1`` and ``openpyxl2`` + which will explicitly require openpyxl v1 and v2 respectively, failing if + the requested version is not available. The ``openpyxl`` engine is a now a + meta-engine that automatically uses whichever version of openpyxl is + installed. (:issue:`7177`) diff --git a/pandas/compat/openpyxl_compat.py b/pandas/compat/openpyxl_compat.py index d0c2a807e14db..266aded2071b6 100644 --- a/pandas/compat/openpyxl_compat.py +++ b/pandas/compat/openpyxl_compat.py @@ -10,15 +10,26 @@ stop_ver = '2.0.0' -def is_compat(): - """Detect whether the installed version of openpyxl is supported. +def is_compat(major_ver=1): + """Detect whether the installed version of openpyxl is supported + Parameters + ---------- + ver : int + 1 requests compatibility status among the 1.x.y series + 2 requests compatibility status of 2.0.0 and later Returns ------- compat : bool - ``True`` if openpyxl is installed and is between versions 1.6.1 and - 2.0.0, ``False`` otherwise. + ``True`` if openpyxl is installed and is a compatible version. + ``False`` otherwise. """ import openpyxl ver = LooseVersion(openpyxl.__version__) - return LooseVersion(start_ver) <= ver < LooseVersion(stop_ver) + if major_ver == 1: + return LooseVersion(start_ver) <= ver < LooseVersion(stop_ver) + elif major_ver == 2: + return LooseVersion(stop_ver) <= ver + else: + raise ValueError('cannot test for openpyxl compatibility with ver {0}' + .format(major_ver)) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index f81cf6502a0e6..84f04188b7906 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -46,6 +46,20 @@ def register_writer(klass): def get_writer(engine_name): + if engine_name == 'openpyxl': + try: + import openpyxl + + # with version-less openpyxl engine + # make sure we make the intelligent choice for the user + if LooseVersion(openpyxl.__version__) < '2.0.0': + return _writers['openpyxl1'] + else: + return _writers['openpyxl2'] + except ImportError: + # fall through to normal exception handling below + pass + try: return _writers[engine_name] except KeyError: @@ -527,20 +541,20 @@ def close(self): return self.save() -class _OpenpyxlWriter(ExcelWriter): - engine = 'openpyxl' +class _Openpyxl1Writer(ExcelWriter): + engine = 'openpyxl1' supported_extensions = ('.xlsx', '.xlsm') + openpyxl_majorver = 1 def __init__(self, path, engine=None, **engine_kwargs): - if not openpyxl_compat.is_compat(): + if not openpyxl_compat.is_compat(major_ver=self.openpyxl_majorver): raise ValueError('Installed openpyxl is not supported at this ' - 'time. Use >={0} and ' - '<{1}.'.format(openpyxl_compat.start_ver, - openpyxl_compat.stop_ver)) + 'time. Use {0}.x.y.' + .format(self.openpyxl_majorver)) # Use the openpyxl module as the Excel writer. from openpyxl.workbook import Workbook - super(_OpenpyxlWriter, self).__init__(path, **engine_kwargs) + super(_Openpyxl1Writer, self).__init__(path, **engine_kwargs) # Create workbook object with default optimized_write=True. self.book = Workbook() @@ -632,9 +646,427 @@ def _convert_to_style(cls, style_dict): return xls_style +register_writer(_Openpyxl1Writer) + + +class _OpenpyxlWriter(_Openpyxl1Writer): + engine = 'openpyxl' + register_writer(_OpenpyxlWriter) +class _Openpyxl2Writer(_Openpyxl1Writer): + """ + Note: Support for OpenPyxl v2 is currently EXPERIMENTAL (GH7565). + """ + engine = 'openpyxl2' + openpyxl_majorver = 2 + + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0): + # Write the frame cells using openpyxl. + from openpyxl.cell import get_column_letter + + sheet_name = self._get_sheet_name(sheet_name) + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.create_sheet() + wks.title = sheet_name + self.sheets[sheet_name] = wks + + for cell in cells: + colletter = get_column_letter(startcol + cell.col + 1) + xcell = wks.cell("%s%s" % (colletter, startrow + cell.row + 1)) + xcell.value = _conv_value(cell.val) + style_kwargs = {} + + # Apply format codes before cell.style to allow override + if isinstance(cell.val, datetime.datetime): + style_kwargs.update(self._convert_to_style_kwargs({ + 'number_format':{'format_code': self.datetime_format}})) + elif isinstance(cell.val, datetime.date): + style_kwargs.update(self._convert_to_style_kwargs({ + 'number_format':{'format_code': self.date_format}})) + + if cell.style: + style_kwargs.update(self._convert_to_style_kwargs(cell.style)) + + if style_kwargs: + xcell.style = xcell.style.copy(**style_kwargs) + + if cell.mergestart is not None and cell.mergeend is not None: + cletterstart = get_column_letter(startcol + cell.col + 1) + cletterend = get_column_letter(startcol + cell.mergeend + 1) + + wks.merge_cells('%s%s:%s%s' % (cletterstart, + startrow + cell.row + 1, + cletterend, + startrow + cell.mergestart + 1)) + + # Excel requires that the format of the first cell in a merged + # range is repeated in the rest of the merged range. + if style: + first_row = startrow + cell.row + 1 + last_row = startrow + cell.mergestart + 1 + first_col = startcol + cell.col + 1 + last_col = startcol + cell.mergeend + 1 + + for row in range(first_row, last_row + 1): + for col in range(first_col, last_col + 1): + if row == first_row and col == first_col: + # Ignore first cell. It is already handled. + continue + colletter = get_column_letter(col) + xcell = wks.cell("%s%s" % (colletter, row)) + xcell.style = xcell.style.copy(**style_kwargs) + + @classmethod + def _convert_to_style_kwargs(cls, style_dict): + """ + Convert a style_dict to a set of kwargs suitable for initializing + or updating-on-copy an openpyxl v2 style object + Parameters + ---------- + style_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'font' + 'fill' + 'border' ('borders') + 'alignment' + 'number_format' + 'protection' + Returns + ------- + style_kwargs : dict + A dict with the same, normalized keys as ``style_dict`` but each + value has been replaced with a native openpyxl style object of the + appropriate class. + """ + + _style_key_map = { + 'borders': 'border', + } + + style_kwargs = {} + for k, v in style_dict.items(): + if k in _style_key_map: + k = _style_key_map[k] + _conv_to_x = getattr(cls, '_convert_to_{0}'.format(k), + lambda x: None) + new_v = _conv_to_x(v) + if new_v: + style_kwargs[k] = new_v + + return style_kwargs + + + @classmethod + def _convert_to_color(cls, color_spec): + """ + Convert ``color_spec`` to an openpyxl v2 Color object + Parameters + ---------- + color_spec : str, dict + A 32-bit ARGB hex string, or a dict with zero or more of the + following keys. + 'rgb' + 'indexed' + 'auto' + 'theme' + 'tint' + 'index' + 'type' + Returns + ------- + color : openpyxl.styles.Color + """ + + from openpyxl.styles import Color + + if isinstance(color_spec, str): + return Color(color_spec) + else: + return Color(**color_spec) + + + @classmethod + def _convert_to_font(cls, font_dict): + """ + Convert ``font_dict`` to an openpyxl v2 Font object + Parameters + ---------- + font_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'name' + 'size' ('sz') + 'bold' ('b') + 'italic' ('i') + 'underline' ('u') + 'strikethrough' ('strike') + 'color' + 'vertAlign' ('vertalign') + 'charset' + 'scheme' + 'family' + 'outline' + 'shadow' + 'condense' + Returns + ------- + font : openpyxl.styles.Font + """ + + from openpyxl.styles import Font + + _font_key_map = { + 'sz': 'size', + 'b': 'bold', + 'i': 'italic', + 'u': 'underline', + 'strike': 'strikethrough', + 'vertalign': 'vertAlign', + } + + font_kwargs = {} + for k, v in font_dict.items(): + if k in _font_key_map: + k = _font_key_map[k] + if k == 'color': + v = cls._convert_to_color(v) + font_kwargs[k] = v + + return Font(**font_kwargs) + + + @classmethod + def _convert_to_stop(cls, stop_seq): + """ + Convert ``stop_seq`` to a list of openpyxl v2 Color objects, + suitable for initializing the ``GradientFill`` ``stop`` parameter. + Parameters + ---------- + stop_seq : iterable + An iterable that yields objects suitable for consumption by + ``_convert_to_color``. + Returns + ------- + stop : list of openpyxl.styles.Color + """ + + return map(cls._convert_to_color, stop_seq) + + + @classmethod + def _convert_to_fill(cls, fill_dict): + """ + Convert ``fill_dict`` to an openpyxl v2 Fill object + Parameters + ---------- + fill_dict : dict + A dict with one or more of the following keys (or their synonyms), + 'fill_type' ('patternType', 'patterntype') + 'start_color' ('fgColor', 'fgcolor') + 'end_color' ('bgColor', 'bgcolor') + or one or more of the following keys (or their synonyms). + 'type' ('fill_type') + 'degree' + 'left' + 'right' + 'top' + 'bottom' + 'stop' + Returns + ------- + fill : openpyxl.styles.Fill + """ + + from openpyxl.styles import PatternFill, GradientFill + + _pattern_fill_key_map = { + 'patternType': 'fill_type', + 'patterntype': 'fill_type', + 'fgColor': 'start_color', + 'fgcolor': 'start_color', + 'bgColor': 'end_color', + 'bgcolor': 'end_color', + } + + _gradient_fill_key_map = { + 'fill_type': 'type', + } + + pfill_kwargs = {} + gfill_kwargs = {} + for k, v in fill_dict.items(): + pk = gk = None + if k in _pattern_fill_key_map: + pk = _pattern_fill_key_map[k] + if k in _gradient_fill_key_map: + gk = _gradient_fill_key_map[k] + if pk in ['start_color', 'end_color']: + v = cls._convert_to_color(v) + if gk == 'stop': + v = cls._convert_to_stop(v) + if pk: + pfill_kwargs[pk] = v + elif gk: + gfill_kwargs[gk] = v + else: + pfill_kwargs[k] = v + gfill_kwargs[k] = v + + try: + return PatternFill(**pfill_kwargs) + except TypeError: + return GradientFill(**gfill_kwargs) + + + @classmethod + def _convert_to_side(cls, side_spec): + """ + Convert ``side_spec`` to an openpyxl v2 Side object + Parameters + ---------- + side_spec : str, dict + A string specifying the border style, or a dict with zero or more + of the following keys (or their synonyms). + 'style' ('border_style') + 'color' + Returns + ------- + side : openpyxl.styles.Side + """ + + from openpyxl.styles import Side + + _side_key_map = { + 'border_style': 'style', + } + + if isinstance(side_spec, str): + return Side(style=side_spec) + + side_kwargs = {} + for k, v in side_spec.items(): + if k in _side_key_map: + k = _side_key_map[k] + if k == 'color': + v = cls._convert_to_color(v) + side_kwargs[k] = v + + return Side(**side_kwargs) + + + @classmethod + def _convert_to_border(cls, border_dict): + """ + Convert ``border_dict`` to an openpyxl v2 Border object + Parameters + ---------- + border_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'left' + 'right' + 'top' + 'bottom' + 'diagonal' + 'diagonal_direction' + 'vertical' + 'horizontal' + 'diagonalUp' ('diagonalup') + 'diagonalDown' ('diagonaldown') + 'outline' + Returns + ------- + border : openpyxl.styles.Border + """ + + from openpyxl.styles import Border + + _border_key_map = { + 'diagonalup': 'diagonalUp', + 'diagonaldown': 'diagonalDown', + } + + border_kwargs = {} + for k, v in border_dict.items(): + if k in _border_key_map: + k = _border_key_map[k] + if k == 'color': + v = cls._convert_to_color(v) + if k in ['left', 'right', 'top', 'bottom', 'diagonal']: + v = cls._convert_to_side(v) + border_kwargs[k] = v + + return Border(**border_kwargs) + + + @classmethod + def _convert_to_alignment(cls, alignment_dict): + """ + Convert ``alignment_dict`` to an openpyxl v2 Alignment object + Parameters + ---------- + alignment_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'horizontal' + 'vertical' + 'text_rotation' + 'wrap_text' + 'shrink_to_fit' + 'indent' + Returns + ------- + alignment : openpyxl.styles.Alignment + """ + + from openpyxl.styles import Alignment + + return Alignment(**alignment_dict) + + + @classmethod + def _convert_to_number_format(cls, number_format_dict): + """ + Convert ``number_format_dict`` to an openpyxl v2 NumberFormat object. + Parameters + ---------- + number_format_dict : dict + A dict with zero or more of the following keys. + 'format_code' + Returns + ------- + number_format : openpyxl.styles.NumberFormat + """ + + from openpyxl.styles import NumberFormat + + return NumberFormat(**number_format_dict) + + + @classmethod + def _convert_to_protection(cls, protection_dict): + """ + Convert ``protection_dict`` to an openpyxl v2 Protection object. + Parameters + ---------- + protection_dict : dict + A dict with zero or more of the following keys. + 'locked' + 'hidden' + Returns + ------- + """ + + from openpyxl.styles import Protection + + return Protection(**protection_dict) + + +register_writer(_Openpyxl2Writer) + + class _XlwtWriter(ExcelWriter): engine = 'xlwt' supported_extensions = ('.xls',) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 96db535347921..17407e3a864e2 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -17,8 +17,8 @@ from pandas import DataFrame, Index, MultiIndex from pandas.io.parsers import read_csv from pandas.io.excel import ( - ExcelFile, ExcelWriter, read_excel, _XlwtWriter, _OpenpyxlWriter, - register_writer, _XlsxWriter + ExcelFile, ExcelWriter, read_excel, _XlwtWriter, _Openpyxl1Writer, + _Openpyxl2Writer, register_writer, _XlsxWriter ) from pandas.io.common import URLError from pandas.util.testing import ensure_clean @@ -1127,35 +1127,39 @@ def test_swapped_columns(self): tm.assert_series_equal(write_frame['B'], read_frame['B']) -def raise_wrapper(orig_method): - @functools.wraps(orig_method) - def wrapped(self, *args, **kwargs): - _skip_if_no_openpyxl() - if openpyxl_compat.is_compat(): - orig_method(self, *args, **kwargs) - else: - msg = 'Installed openpyxl is not supported at this time\. Use.+' - with tm.assertRaisesRegexp(ValueError, msg): +def raise_wrapper(major_ver): + def versioned_raise_wrapper(orig_method): + @functools.wraps(orig_method) + def wrapped(self, *args, **kwargs): + _skip_if_no_openpyxl() + if openpyxl_compat.is_compat(major_ver=major_ver): orig_method(self, *args, **kwargs) - return wrapped + else: + msg = 'Installed openpyxl is not supported at this time\. Use.+' + with tm.assertRaisesRegexp(ValueError, msg): + orig_method(self, *args, **kwargs) + return wrapped + return versioned_raise_wrapper -def raise_on_incompat_version(cls): - methods = filter(operator.methodcaller('startswith', 'test_'), dir(cls)) - for method in methods: - setattr(cls, method, raise_wrapper(getattr(cls, method))) - return cls +def raise_on_incompat_version(major_ver): + def versioned_raise_on_incompat_version(cls): + methods = filter(operator.methodcaller('startswith', 'test_'), dir(cls)) + for method in methods: + setattr(cls, method, raise_wrapper(major_ver)(getattr(cls, method))) + return cls + return versioned_raise_on_incompat_version -@raise_on_incompat_version +@raise_on_incompat_version(1) class OpenpyxlTests(ExcelWriterBase, tm.TestCase): ext = '.xlsx' - engine_name = 'openpyxl' + engine_name = 'openpyxl1' check_skip = staticmethod(lambda *args, **kwargs: None) def test_to_excel_styleconverter(self): _skip_if_no_openpyxl() - if not openpyxl_compat.is_compat(): + if not openpyxl_compat.is_compat(major_ver=1): raise nose.SkipTest('incompatiable openpyxl version') import openpyxl @@ -1167,7 +1171,7 @@ def test_to_excel_styleconverter(self): "left": "thin"}, "alignment": {"horizontal": "center", "vertical": "top"}} - xlsx_style = _OpenpyxlWriter._convert_to_style(hstyle) + xlsx_style = _Openpyxl1Writer._convert_to_style(hstyle) self.assertTrue(xlsx_style.font.bold) self.assertEqual(openpyxl.style.Border.BORDER_THIN, xlsx_style.borders.top.border_style) @@ -1183,6 +1187,70 @@ def test_to_excel_styleconverter(self): xlsx_style.alignment.vertical) +@raise_on_incompat_version(2) +class Openpyxl2Tests(ExcelWriterBase, tm.TestCase): + ext = '.xlsx' + engine_name = 'openpyxl2' + check_skip = staticmethod(lambda *args, **kwargs: None) + + def test_to_excel_styleconverter(self): + _skip_if_no_openpyxl() + if not openpyxl_compat.is_compat(major_ver=2): + raise nose.SkipTest('incompatiable openpyxl version') + + from openpyxl import styles + + hstyle = { + "font": { + "color": '00FF0000', + "bold": True, + }, + "borders": { + "top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin", + }, + "alignment": { + "horizontal": "center", + "vertical": "top", + }, + "fill": { + "patternType": 'solid', + 'fgColor': { + 'rgb': '006666FF', + 'tint': 0.3, + }, + }, + "number_format": { + "format_code": "0.00" + }, + "protection": { + "locked": True, + "hidden": False, + }, + } + + font_color = styles.Color('00FF0000') + font = styles.Font(bold=True, color=font_color) + side = styles.Side(style=styles.borders.BORDER_THIN) + border = styles.Border(top=side, right=side, bottom=side, left=side) + alignment = styles.Alignment(horizontal='center', vertical='top') + fill_color = styles.Color(rgb='006666FF', tint=0.3) + fill = styles.PatternFill(patternType='solid', fgColor=fill_color) + number_format = styles.NumberFormat(format_code='0.00') + protection = styles.Protection(locked=True, hidden=False) + + kw = _Openpyxl2Writer._convert_to_style_kwargs(hstyle) + self.assertEqual(kw['font'], font) + self.assertEqual(kw['border'], border) + self.assertEqual(kw['alignment'], alignment) + self.assertEqual(kw['fill'], fill) + self.assertEqual(kw['number_format'], number_format) + self.assertEqual(kw['protection'], protection) + + + class XlwtTests(ExcelWriterBase, tm.TestCase): ext = '.xls' engine_name = 'xlwt' @@ -1216,7 +1284,6 @@ class XlsxWriterTests(ExcelWriterBase, tm.TestCase): check_skip = staticmethod(_skip_if_no_xlsxwriter) -@raise_on_incompat_version class OpenpyxlTests_NoMerge(ExcelWriterBase, tm.TestCase): ext = '.xlsx' engine_name = 'openpyxl' @@ -1254,9 +1321,9 @@ def test_ExcelWriter_dispatch(self): writer_klass = _XlsxWriter except ImportError: _skip_if_no_openpyxl() - if not openpyxl_compat.is_compat(): + if not openpyxl_compat.is_compat(major_ver=1): raise nose.SkipTest('incompatible openpyxl version') - writer_klass = _OpenpyxlWriter + writer_klass = _Openpyxl1Writer with ensure_clean('.xlsx') as path: writer = ExcelWriter(path)