Skip to content

Pass through Engine kwargs in ExcelWriter #43445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Nov 25, 2021
Merged
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ I/O
- Bug in :func:`json_normalize` where multi-character ``sep`` parameter is incorrectly prefixed to every key (:issue:`43831`)
- Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`)
- Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`)
-
- Bug in :class:`ExcelWriter`, where ``engine_kwargs`` were not passed through to all engines (:issue:`43442`)

Period
^^^^^^
Expand Down
28 changes: 27 additions & 1 deletion pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,13 @@ class ExcelWriter(metaclass=abc.ABCMeta):

.. versionadded:: 1.3.0
engine_kwargs : dict, optional
Keyword arguments to be passed into the engine.
Keyword arguments to be passed into the engine. These will be passed to
the following functions of the respective engines:

* xlsxwriter: ``xlsxwriter.Workbook(file, **engine_kwargs)``
* openpyxl (write mode): ``openpyxl.Workbook(**engine_kwargs)``
* openpyxl (append mode): ``openpyxl.load_workbook(file, **engine_kwargs)``
* odswriter: ``odf.opendocument.OpenDocumentSpreadsheet(**engine_kwargs)``

.. versionadded:: 1.3.0
**kwargs : dict, optional
Expand Down Expand Up @@ -780,6 +786,26 @@ class ExcelWriter(metaclass=abc.ABCMeta):
... with zf.open("filename.xlsx", "w") as buffer:
... with pd.ExcelWriter(buffer) as writer:
... df.to_excel(writer)

You can specify additional arguments to the underlying engine:

>>> with pd.ExcelWriter(
... "path_to_file.xlsx",
... engine="xlsxwriter",
... engine_kwargs={"options": {"nan_inf_to_errors": True}}
... ) as writer:
... df.to_excel(writer)

In append mode, ``engine_kwargs`` are passed through to
openpyxl's ``load_workbook``:

>>> with pd.ExcelWriter(
... "path_to_file.xlsx",
... engine="openpyxl",
... mode="a",
... engine_kwargs={"keep_vba": True}
... ) as writer:
... df.to_excel(writer, sheet_name="Sheet2")
"""

# Defining an ExcelWriter implementation (see abstract methods for more...)
Expand Down
9 changes: 7 additions & 2 deletions pandas/io/excel/_odswriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
from pandas._typing import StorageOptions

from pandas.io.excel._base import ExcelWriter
from pandas.io.excel._util import validate_freeze_panes
from pandas.io.excel._util import (
combine_kwargs,
validate_freeze_panes,
)
from pandas.io.formats.excel import ExcelCell


Expand Down Expand Up @@ -44,7 +47,9 @@ def __init__(
engine_kwargs=engine_kwargs,
)

self.book = OpenDocumentSpreadsheet()
engine_kwargs = combine_kwargs(engine_kwargs, kwargs)

self.book = OpenDocumentSpreadsheet(**engine_kwargs)
self._style_dict: dict[str, str] = {}

def save(self) -> None:
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ def __init__(
if "r+" in self.mode: # Load from existing workbook
from openpyxl import load_workbook

self.book = load_workbook(self.handles.handle)
self.book = load_workbook(self.handles.handle, **engine_kwargs)
self.handles.handle.seek(0)
self.sheets = {name: self.book[name] for name in self.book.sheetnames}

else:
# Create workbook object with default optimized_write=True.
self.book = Workbook()
self.book = Workbook(**engine_kwargs)

if self.book.worksheets:
self.book.remove(self.book.worksheets[0])
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_xlwt.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(

if encoding is None:
encoding = "ascii"
self.book = xlwt.Workbook(encoding=encoding)
self.book = xlwt.Workbook(encoding=encoding, **engine_kwargs)
self.fm_datetime = xlwt.easyxf(num_format_str=self.datetime_format)
self.fm_date = xlwt.easyxf(num_format_str=self.date_format)

Expand Down
47 changes: 32 additions & 15 deletions pandas/tests/io/excel/test_odswriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,40 @@ def test_write_append_mode_raises(ext):
ExcelWriter(f, engine="odf", mode="a")


@pytest.mark.parametrize("nan_inf_to_errors", [True, False])
def test_kwargs(ext, nan_inf_to_errors):
def test_kwargs(ext):
# GH 42286
# odswriter doesn't utilize kwargs, nothing to check except that it works
kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}}
# GH 43445
# test for error: OpenDocumentSpreadsheet does not accept any arguments
kwargs = {"kwarg": 1}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="odf", **kwargs) as _:
pass


@pytest.mark.parametrize("nan_inf_to_errors", [True, False])
def test_engine_kwargs(ext, nan_inf_to_errors):
error = re.escape(
"OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'"
)
with pytest.raises(
TypeError,
match=error,
):
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="odf", **kwargs) as _:
pass


@pytest.mark.parametrize("engine_kwargs", [None, {"kwarg": 1}])
def test_engine_kwargs(ext, engine_kwargs):
# GH 42286
# odswriter doesn't utilize engine_kwargs, nothing to check except that it works
engine_kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}}
# GH 43445
# test for error: OpenDocumentSpreadsheet does not accept any arguments
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _:
pass
if engine_kwargs is not None:
error = re.escape(
"OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'"
)
with pytest.raises(
TypeError,
match=error,
):
ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs)
else:
with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _:
pass
53 changes: 43 additions & 10 deletions pandas/tests/io/excel/test_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,30 +85,63 @@ def test_write_cells_merge_styled(ext):
assert xcell_a2.font == openpyxl_sty_merged


@pytest.mark.parametrize("write_only", [True, False])
def test_kwargs(ext, write_only):
# GH 42286
# openpyxl doesn't utilize kwargs, only test that supplying a kwarg works
kwargs = {"write_only": write_only}
@pytest.mark.parametrize("iso_dates", [True, False])
def test_kwargs(ext, iso_dates):
# GH 42286 GH 43445
kwargs = {"iso_dates": iso_dates}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="openpyxl", **kwargs) as writer:
assert writer.book.iso_dates == iso_dates
# ExcelWriter won't allow us to close without writing something
DataFrame().to_excel(writer)


@pytest.mark.parametrize("write_only", [True, False])
def test_engine_kwargs(ext, write_only):
# GH 42286
# openpyxl doesn't utilize kwargs, only test that supplying a engine_kwarg works
engine_kwargs = {"write_only": write_only}
@pytest.mark.parametrize("iso_dates", [True, False])
def test_engine_kwargs_write(ext, iso_dates):
# GH 42286 GH 43445
engine_kwargs = {"iso_dates": iso_dates}
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer:
assert writer.book.iso_dates == iso_dates
# ExcelWriter won't allow us to close without writing something
DataFrame().to_excel(writer)


def test_engine_kwargs_append_invalid(ext):
# GH 43445
# test whether an invalid engine kwargs actually raises
with tm.ensure_clean(ext) as f:
DataFrame(["hello", "world"]).to_excel(f)
with pytest.raises(
TypeError,
match=re.escape(
"load_workbook() got an unexpected keyword argument 'apple_banana'"
),
):
with ExcelWriter(
f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"}
) as writer:
# ExcelWriter needs us to write something to close properly
DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2")


@pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")])
def test_engine_kwargs_append_data_only(ext, data_only, expected):
# GH 43445
# tests whether the data_only engine_kwarg actually works well for
# openpyxl's load_workbook
with tm.ensure_clean(ext) as f:
DataFrame(["=1+1"]).to_excel(f)
with ExcelWriter(
f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only}
) as writer:
assert writer.sheets["Sheet1"]["B2"].value == expected
# ExcelWriter needs us to writer something to close properly?
DataFrame().to_excel(writer, sheet_name="Sheet2")


@pytest.mark.parametrize(
"mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]
)
Expand Down
22 changes: 12 additions & 10 deletions pandas/tests/io/excel/test_xlwt.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,25 +101,27 @@ def test_option_xls_writer_deprecated(ext):
options.io.excel.xls.writer = "xlwt"


@pytest.mark.parametrize("write_only", [True, False])
def test_kwargs(ext, write_only):
@pytest.mark.parametrize("style_compression", [0, 2])
def test_kwargs(ext, style_compression):
# GH 42286
# xlwt doesn't utilize kwargs, only test that supplying a kwarg works
kwargs = {"write_only": write_only}
kwargs = {"style_compression": style_compression}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="openpyxl", **kwargs) as writer:
with ExcelWriter(f, engine="xlwt", **kwargs) as writer:
assert (
writer.book._Workbook__styles.style_compression == style_compression
)
# xlwt won't allow us to close without writing something
DataFrame().to_excel(writer)


@pytest.mark.parametrize("write_only", [True, False])
def test_engine_kwargs(ext, write_only):
@pytest.mark.parametrize("style_compression", [0, 2])
def test_engine_kwargs(ext, style_compression):
# GH 42286
# xlwt doesn't utilize kwargs, only test that supplying a engine_kwarg works
engine_kwargs = {"write_only": write_only}
engine_kwargs = {"style_compression": style_compression}
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer:
with ExcelWriter(f, engine="xlwt", engine_kwargs=engine_kwargs) as writer:
assert writer.book._Workbook__styles.style_compression == style_compression
# xlwt won't allow us to close without writing something
DataFrame().to_excel(writer)