Skip to content

Pass through Engine kwargs in ExcelWriter #43445

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Nov 25, 2021
29 changes: 28 additions & 1 deletion pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -700,7 +700,14 @@ class ExcelWriter(metaclass=abc.ABCMeta):

.. versionadded:: 1.3.0
engine_kwargs : dict, optional
Keyword arguments to be passed into the engine.
Keyword arguments to be passed into the engine. These will be passed to
the following functions of the respective engines:

* odswriter: ``odf.opendocument.OpenDocumentSpreadsheet(**engine_kwargs)``
* xlsxwriter: ``xlsxwriter.Workbook(file, **engine_kwargs)``
* xlwt: ``xlwt.Workbook(encoding, **engine_kwargs)``
* openpyxl (write mode): ``openpyxl.Workbook(**engine_kwargs)``
* openpyxl (append mode): ``openpyxl.load_workbook(file, **engine_kwargs)``

.. versionadded:: 1.3.0
**kwargs : dict, optional
Expand Down Expand Up @@ -780,6 +787,26 @@ class ExcelWriter(metaclass=abc.ABCMeta):
... with zf.open("filename.xlsx", "w") as buffer:
... with pd.ExcelWriter(buffer) as writer:
... df.to_excel(writer)

You can specify additional arguments to the underlying engine:

>>> with pd.ExcelWriter(
... "path_to_file.xlsx",
... engine="xlsxwriter",
... engine_kwargs={"options": {"nan_inf_to_errors": True}}
... ) as writer:
... df.to_excel(writer)

In append mode, ``engine_kwargs`` are passed through to
openpyxl's ``load_workbook``:

>>> with pd.ExcelWriter(
... "path_to_file.xlsx",
... engine="openpyxl",
... mode="a",
... engine_kwargs={"keep_vba": True}
... ) as writer:
... df.to_excel(writer, sheet_name="Sheet2")
"""

# Defining an ExcelWriter implementation (see abstract methods for more...)
Expand Down
9 changes: 7 additions & 2 deletions pandas/io/excel/_odswriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
from pandas._typing import StorageOptions

from pandas.io.excel._base import ExcelWriter
from pandas.io.excel._util import validate_freeze_panes
from pandas.io.excel._util import (
combine_kwargs,
validate_freeze_panes,
)
from pandas.io.formats.excel import ExcelCell


Expand Down Expand Up @@ -44,7 +47,9 @@ def __init__(
engine_kwargs=engine_kwargs,
)

self.book = OpenDocumentSpreadsheet()
engine_kwargs = combine_kwargs(engine_kwargs, kwargs)

self.book = OpenDocumentSpreadsheet(**engine_kwargs)
self._style_dict: dict[str, str] = {}

def save(self) -> None:
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ def __init__(
if "r+" in self.mode: # Load from existing workbook
from openpyxl import load_workbook

self.book = load_workbook(self.handles.handle)
self.book = load_workbook(self.handles.handle, **engine_kwargs)
self.handles.handle.seek(0)
self.sheets = {name: self.book[name] for name in self.book.sheetnames}

else:
# Create workbook object with default optimized_write=True.
self.book = Workbook()
self.book = Workbook(**engine_kwargs)

if self.book.worksheets:
self.book.remove(self.book.worksheets[0])
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_xlwt.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(

if encoding is None:
encoding = "ascii"
self.book = xlwt.Workbook(encoding=encoding)
self.book = xlwt.Workbook(encoding=encoding, **engine_kwargs)
self.fm_datetime = xlwt.easyxf(num_format_str=self.datetime_format)
self.fm_date = xlwt.easyxf(num_format_str=self.date_format)

Expand Down
47 changes: 32 additions & 15 deletions pandas/tests/io/excel/test_odswriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,40 @@ def test_write_append_mode_raises(ext):
ExcelWriter(f, engine="odf", mode="a")


@pytest.mark.parametrize("nan_inf_to_errors", [True, False])
def test_kwargs(ext, nan_inf_to_errors):
def test_kwargs(ext):
# GH 42286
# odswriter doesn't utilize kwargs, nothing to check except that it works
kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}}
# GH 43445
# test for error: OpenDocumentSpreadsheet does not accept any arguments
kwargs = {"kwarg": 1}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="odf", **kwargs) as _:
pass


@pytest.mark.parametrize("nan_inf_to_errors", [True, False])
def test_engine_kwargs(ext, nan_inf_to_errors):
error = re.escape(
"OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'"
)
with pytest.raises(
TypeError,
match=error,
):
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="odf", **kwargs) as _:
pass


@pytest.mark.parametrize("engine_kwargs", [None, {"kwarg": 1}])
def test_engine_kwargs(ext, engine_kwargs):
# GH 42286
# odswriter doesn't utilize engine_kwargs, nothing to check except that it works
engine_kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}}
# GH 43445
# test for error: OpenDocumentSpreadsheet does not accept any arguments
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _:
pass
if engine_kwargs is not None:
error = re.escape(
"OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'"
)
with pytest.raises(
TypeError,
match=error,
):
ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs)
else:
with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _:
pass
71 changes: 61 additions & 10 deletions pandas/tests/io/excel/test_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,30 +85,81 @@ def test_write_cells_merge_styled(ext):
assert xcell_a2.font == openpyxl_sty_merged


@pytest.mark.parametrize("write_only", [True, False])
def test_kwargs(ext, write_only):
# GH 42286
# openpyxl doesn't utilize kwargs, only test that supplying a kwarg works
kwargs = {"write_only": write_only}
@pytest.mark.parametrize("iso_dates", [True, False])
def test_kwargs(ext, iso_dates):
# GH 42286 GH 43445
kwargs = {"iso_dates": iso_dates}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="openpyxl", **kwargs) as writer:
assert writer.book.iso_dates == iso_dates
# ExcelWriter won't allow us to close without writing something
DataFrame().to_excel(writer)


@pytest.mark.parametrize("write_only", [True, False])
def test_engine_kwargs(ext, write_only):
# GH 42286
# openpyxl doesn't utilize kwargs, only test that supplying a engine_kwarg works
engine_kwargs = {"write_only": write_only}
@pytest.mark.parametrize("iso_dates", [True, False])
def test_engine_kwargs_write(ext, iso_dates):
# GH 42286 GH 43445
engine_kwargs = {"iso_dates": iso_dates}
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer:
assert writer.book.iso_dates == iso_dates
# ExcelWriter won't allow us to close without writing something
DataFrame().to_excel(writer)


@pytest.mark.parametrize(
"engine_kwargs", [{"data_only": True}, {"keep_vba": True}, {"keep_links": False}]
)
def test_engine_kwargs_append(ext, engine_kwargs):
# GH 43445
# only read_only=True will give something easy that can be verified (maybe
# keep_links or data_only could also work, but that'd be more complicated)
# arguments are passed through to load_workbook()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think read_only would be sufficient. Need to verify the kwarg gets through.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought testing for all available parameters would be nice, since then we will also be warned when API changes on openpyxl's side...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, read-only will not work here, and give different errors on windows than linux, this is why the previous test failed

Copy link
Member

@rhshadrach rhshadrach Nov 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'm understanding the test correctly, the passing of kwargs through to openpyxl could be entirely removed from pandas and this test would pass. Let me know if you're not sure of a way to improve that (or if my assessment is wrong).

Copy link
Contributor Author

@feefladder feefladder Nov 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, that's true. That's why I have the other test: test_engine_kwargs_append_invalid that tests for an invalid keyword argument in load_workbook. Also the other test test_engine_kwargs_append_data_only tests for correct use of openpyxl's data_only being passed through. That would make this test redundant indeed? It just felt logical to first test if passing an engine kwarg does not raise an error normally, then test if it does raise with an invalid argument and then test if the passing of engine kwargs works well together with openpyxl. Then in that order, people will know more precisely where the error lies.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tell me if you want it removed!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the summary, very helpful. Yes, I think this test can be removed as it's redundant with test_engine_kwargs_append_data_only. In general, we'd rather not have to change tests if the API of a third party engine changes and it doesn't negatively impact pandas. It is okay to do so when necessary, as is in the data_only test.

with tm.ensure_clean(ext) as f:
DataFrame(["hello", "world"]).to_excel(f)
with ExcelWriter(
f, engine="openpyxl", mode="a", engine_kwargs=engine_kwargs
) as writer:
DataFrame(["goodbye", "world"]).to_excel(writer, sheet_name="Sheet2")


@pytest.mark.parametrize("data_only,B2", [(True, 0), (False, "=1+1")])
def test_engine_kwargs_append_keep_links(ext, data_only, B2):
# GH 43445
# tests whether the keep_links engine_kwarg actually works well for
# openpyxl's load_workbook
# not sure if we have to test for this though, since it also relies a bit on
# functionality of openpyxl
with tm.ensure_clean(ext) as f:
DataFrame(["=1+1"]).to_excel(f)
with ExcelWriter(
f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only}
) as writer:
assert writer.sheets["Sheet1"]["B2"].value == B2
# ExcelWriter needs us to writer something to close properly?
DataFrame().to_excel(writer, sheet_name="Sheet2")


def test_engine_kwargs_append_invalid(ext):
# GH 43445
# test whether an invalid engine kwargs actually raises
with tm.ensure_clean(ext) as f:
DataFrame(["hello", "world"]).to_excel(f)
with pytest.raises(
TypeError,
match=re.escape(
"load_workbook() got an unexpected keyword argument 'apple_banana'"
),
):
with ExcelWriter(
f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"}
) as writer:
# not sure if we have to do something here?
DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2")


@pytest.mark.parametrize(
"mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]
)
Expand Down
22 changes: 12 additions & 10 deletions pandas/tests/io/excel/test_xlwt.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,25 +101,27 @@ def test_option_xls_writer_deprecated(ext):
options.io.excel.xls.writer = "xlwt"


@pytest.mark.parametrize("write_only", [True, False])
def test_kwargs(ext, write_only):
@pytest.mark.parametrize("style_compression", [0, 2])
def test_kwargs(ext, style_compression):
# GH 42286
# xlwt doesn't utilize kwargs, only test that supplying a kwarg works
kwargs = {"write_only": write_only}
kwargs = {"style_compression": style_compression}
with tm.ensure_clean(ext) as f:
msg = re.escape("Use of **kwargs is deprecated")
with tm.assert_produces_warning(FutureWarning, match=msg):
with ExcelWriter(f, engine="openpyxl", **kwargs) as writer:
with ExcelWriter(f, engine="xlwt", **kwargs) as writer:
assert (
writer.book._Workbook__styles.style_compression == style_compression
)
# xlwt won't allow us to close without writing something
DataFrame().to_excel(writer)


@pytest.mark.parametrize("write_only", [True, False])
def test_engine_kwargs(ext, write_only):
@pytest.mark.parametrize("style_compression", [0, 2])
def test_engine_kwargs(ext, style_compression):
# GH 42286
# xlwt doesn't utilize kwargs, only test that supplying a engine_kwarg works
engine_kwargs = {"write_only": write_only}
engine_kwargs = {"style_compression": style_compression}
with tm.ensure_clean(ext) as f:
with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer:
with ExcelWriter(f, engine="xlwt", engine_kwargs=engine_kwargs) as writer:
assert writer.book._Workbook__styles.style_compression == style_compression
# xlwt won't allow us to close without writing something
DataFrame().to_excel(writer)