pandas-dev · jtratner · Sep 22, 2013 · Sep 16, 2013 · jreback · Sep 16, 2013
diff --git a/ci/requirements-2.7.txt b/ci/requirements-2.7.txt
@@ -8,6 +8,7 @@ numexpr==2.1
 tables==2.3.1
 matplotlib==1.1.1
 openpyxl==1.6.2
+xlsxwriter==0.4.3
 xlrd==0.9.2
 patsy==0.1.0
 html5lib==1.0b2

diff --git a/ci/requirements-2.7_LOCALE.txt b/ci/requirements-2.7_LOCALE.txt
@@ -2,6 +2,7 @@ python-dateutil
 pytz==2013b
 xlwt==0.7.5
 openpyxl==1.6.2
+xlsxwriter==0.4.3
 xlrd==0.9.2
 numpy==1.6.1
 cython==0.19.1

diff --git a/ci/requirements-3.2.txt b/ci/requirements-3.2.txt
@@ -1,6 +1,7 @@
 python-dateutil==2.1
 pytz==2013b
 openpyxl==1.6.2
+xlsxwriter==0.4.3
 xlrd==0.9.2
 numpy==1.6.2
 cython==0.19.1

diff --git a/ci/requirements-3.3.txt b/ci/requirements-3.3.txt
@@ -1,6 +1,7 @@
 python-dateutil==2.1
 pytz==2013b
 openpyxl==1.6.2
+xlsxwriter==0.4.3
 xlrd==0.9.2
 html5lib==1.0b2
 numpy==1.7.1

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
@@ -695,13 +695,13 @@ Writing to an excel file
 
 .. ipython:: python
 
-   df.to_excel('foo.xlsx', sheet_name='sheet1')
+   df.to_excel('foo.xlsx', sheet_name='Sheet1')
 
 Reading from an excel file
 
 .. ipython:: python
 
-   pd.read_excel('foo.xlsx', 'sheet1', index_col=None, na_values=['NA'])
+   pd.read_excel('foo.xlsx', 'Sheet1', index_col=None, na_values=['NA'])
 
 .. ipython:: python
    :suppress:

diff --git a/doc/source/install.rst b/doc/source/install.rst
@@ -100,6 +100,8 @@ Optional Dependencies
   * `openpyxl <http://packages.python.org/openpyxl/>`__, `xlrd/xlwt <http://www.python-excel.org/>`__
      * openpyxl version 1.6.1 or higher
      * Needed for Excel I/O
+  * `XlsxWriter <https://pypi.python.org/pypi/XlsxWriter>`__
+     * Alternative Excel writer.
   * `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3
     access.
   * One of `PyQt4

diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -1654,7 +1654,7 @@ indices to be parsed.
 
 .. code-block:: python
 
-   read_excel('path_to_file.xls', Sheet1', parse_cols=[0, 2, 3], index_col=None, na_values=['NA'])
+   read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3], index_col=None, na_values=['NA'])
 
 To write a DataFrame object to a sheet of an Excel file, you can use the
 ``to_excel`` instance method.  The arguments are largely the same as ``to_csv``
@@ -1664,7 +1664,7 @@ written.  For example:
 
 .. code-block:: python
 
-   df.to_excel('path_to_file.xlsx', sheet_name='sheet1')
+   df.to_excel('path_to_file.xlsx', sheet_name='Sheet1')
 
 Files with a ``.xls`` extension will be written using ``xlwt`` and those with
 a ``.xlsx`` extension will be written using ``openpyxl``.
@@ -1677,8 +1677,8 @@ one can use the ExcelWriter class, as in the following example:
 .. code-block:: python
 
    writer = ExcelWriter('path_to_file.xlsx')
-   df1.to_excel(writer, sheet_name='sheet1')
-   df2.to_excel(writer, sheet_name='sheet2')
+   df1.to_excel(writer, sheet_name='Sheet1')
+   df2.to_excel(writer, sheet_name='Sheet2')
    writer.save()
 
 .. _io.excel.writers:
@@ -1693,11 +1693,29 @@ Excel writer engines
 1. the ``engine`` keyword argument
 2. the filename extension (via the default specified in config options)
 
-``pandas`` only supports ``openpyxl`` for ``.xlsx`` and ``.xlsm`` files and
-``xlwt`` for ``.xls`` files.  If you have multiple engines installed, you can choose the
-engine to use by default via the options ``io.excel.xlsx.writer`` and
-``io.excel.xls.writer``.
+By default ``pandas`` only supports
+`openpyxl <http://packages.python.org/openpyxl/>`__ as a writer for ``.xlsx``
+and ``.xlsm`` files and `xlwt <http://www.python-excel.org/>`__ as a writer for
+``.xls`` files.  If you have multiple engines installed, you can change the
+default engine via the ``io.excel.xlsx.writer`` and ``io.excel.xls.writer``
+options.
 
+For example if the optional `XlsxWriter <http://xlsxwriter.readthedocs.org>`__
+module is installed you can use it as a xlsx writer engine as follows:
+
+.. code-block:: python
+
+   # By setting the 'engine' in the DataFrame and Panel 'to_excel()' methods.
+   df.to_excel('path_to_file.xlsx', sheet_name='Sheet1', engine='xlsxwriter')
+
+   # By setting the 'engine' in the ExcelWriter constructor.
+   writer = ExcelWriter('path_to_file.xlsx', engine='xlsxwriter')
+
+   # Or via pandas configuration.
+   from pandas import set_option
+   set_option('io.excel.xlsx.writer', 'xlsxwriter')
+
+   df.to_excel('path_to_file.xlsx', sheet_name='Sheet1')
 
 .. _io.hdf5:
 

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -113,6 +113,9 @@ Improvements to existing features
     ``io.excel.xls.writer``. (:issue:`4745`, :issue:`4750`)
   - ``Panel.to_excel()`` now accepts keyword arguments that will be passed to
     its ``DataFrame``'s ``to_excel()`` methods. (:issue:`4750`)
+  - Added XlsxWriter as an optional ``ExcelWriter``  engine. This is about 5x
+    faster than the default openpyxl xlsx writer and is equivalent in speed
+    to the xlwt xls writer module. (:issue:`4542`)
   - allow DataFrame constructor to accept more list-like objects, e.g. list of
     ``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`,:issue:`4297`, :issue:`4851`),
     thanks @lgautier

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1356,7 +1356,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
                                      tupleize_cols=tupleize_cols)
         formatter.save()
 
-    def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
+    def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',
                  float_format=None, cols=None, header=True, index=True,
                  index_label=None, startrow=0, startcol=0, engine=None):
         """
@@ -1366,7 +1366,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
         ----------
         excel_writer : string or ExcelWriter object
             File path or existing ExcelWriter
-        sheet_name : string, default 'sheet1'
+        sheet_name : string, default 'Sheet1'
             Name of sheet which will contain DataFrame
         na_rep : string, default ''
             Missing data representation
@@ -1397,8 +1397,8 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
         to the existing workbook.  This can be used to save different
         DataFrames to one workbook
         >>> writer = ExcelWriter('output.xlsx')
-        >>> df1.to_excel(writer,'sheet1')
-        >>> df2.to_excel(writer,'sheet2')
+        >>> df1.to_excel(writer,'Sheet1')
+        >>> df2.to_excel(writer,'Sheet2')
         >>> writer.save()
         """
         from pandas.io.excel import ExcelWriter

diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -596,6 +596,7 @@ def _convert_to_style(cls, style_dict, num_format_str=None):
         Parameters
         ----------
         style_dict: style dictionary to convert
+        num_format_str: optional number format string
         """
         import xlwt
 
@@ -611,3 +612,95 @@ def _convert_to_style(cls, style_dict, num_format_str=None):
 
 register_writer(_XlwtWriter)
 
+
+class _XlsxWriter(ExcelWriter):
+    engine = 'xlsxwriter'
+    supported_extensions = ('.xlsx',)
+
+    def __init__(self, path, **engine_kwargs):
+        # Use the xlsxwriter module as the Excel writer.
+        import xlsxwriter
+
+        super(_XlsxWriter, self).__init__(path, **engine_kwargs)
+
+        self.book = xlsxwriter.Workbook(path, **engine_kwargs)
+
+    def save(self):
+        """
+        Save workbook to disk.
+        """
+        return self.book.close()
+
+    def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0):
+        # Write the frame cells using xlsxwriter.
+
+        sheet_name = self._get_sheet_name(sheet_name)
+
+        if sheet_name in self.sheets:
+            wks = self.sheets[sheet_name]
+        else:
+            wks = self.book.add_worksheet(sheet_name)
+            self.sheets[sheet_name] = wks
+
+        style_dict = {}
+
+        for cell in cells:
+            val = _conv_value(cell.val)
+
+            num_format_str = None
+            if isinstance(cell.val, datetime.datetime):
+                num_format_str = "YYYY-MM-DD HH:MM:SS"
+            if isinstance(cell.val, datetime.date):
+                num_format_str = "YYYY-MM-DD"
+
+            stylekey = json.dumps(cell.style)
+            if num_format_str:
+                stylekey += num_format_str
+
+            if stylekey in style_dict:
+                style = style_dict[stylekey]
+            else:
+                style = self._convert_to_style(cell.style, num_format_str)
+                style_dict[stylekey] = style
+
+            if cell.mergestart is not None and cell.mergeend is not None:
+                wks.merge_range(startrow + cell.row,
+                                startrow + cell.mergestart,
+                                startcol + cell.col,
+                                startcol + cell.mergeend,
+                                val, style)
+            else:
+                wks.write(startrow + cell.row,
+                          startcol + cell.col,
+                          val, style)
+
+    def _convert_to_style(self, style_dict, num_format_str=None):
+        """
+        converts a style_dict to an xlsxwriter format object
+        Parameters
+        ----------
+        style_dict: style dictionary to convert
+        num_format_str: optional number format string
+        """
+        if style_dict is None:
+            return None
+
+        # Create a XlsxWriter format object.
+        xl_format = self.book.add_format()
+
+        # Map the cell font to XlsxWriter font properties.
+        if style_dict.get('font'):
+            font = style_dict['font']
+            if font.get('bold'):
+                xl_format.set_bold()
+
+        # Map the cell borders to XlsxWriter border properties.
+        if style_dict.get('borders'):
+            xl_format.set_border()
+
+        if num_format_str is not None:
+            xl_format.set_num_format(num_format_str)
+
+        return xl_format
+
+register_writer(_XlsxWriter)