diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 1f656f267783f..f87b43d8ed202 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -649,6 +649,7 @@ I/O - Bug in :func:`json_normalize` where reading data with missing multi-level metadata would not respect errors="ignore" (:issue:`44312`) - Bug in :func:`read_csv` with :code:`float_precision="round_trip"` which did not skip initial/trailing whitespace (:issue:`43713`) - Bug in dumping/loading a :class:`DataFrame` with ``yaml.dump(frame)`` (:issue:`42748`) +- Bug in :class:`ExcelWriter`, where ``engine_kwargs`` were not passed through to all engines (:issue:`43442`) - Bug in :func:`read_csv` raising ``ValueError`` when ``parse_dates`` was used with ``MultiIndex`` columns (:issue:`8991`) - Bug in :func:`read_csv` raising ``AttributeError`` when attempting to read a .csv file and infer index column dtype from an nullable integer type (:issue:`44079`) - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` with ``compression`` set to ``'zip'`` no longer create a zip file containing a file ending with ".zip". Instead, they try to infer the inner file name more smartly. (:issue:`39465`) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 62e8f648028d8..9eb98195d9a88 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -721,7 +721,13 @@ class ExcelWriter(metaclass=abc.ABCMeta): Added ``overlay`` option engine_kwargs : dict, optional - Keyword arguments to be passed into the engine. + Keyword arguments to be passed into the engine. These will be passed to + the following functions of the respective engines: + + * xlsxwriter: ``xlsxwriter.Workbook(file, **engine_kwargs)`` + * openpyxl (write mode): ``openpyxl.Workbook(**engine_kwargs)`` + * openpyxl (append mode): ``openpyxl.load_workbook(file, **engine_kwargs)`` + * odswriter: ``odf.opendocument.OpenDocumentSpreadsheet(**engine_kwargs)`` .. versionadded:: 1.3.0 **kwargs : dict, optional @@ -823,6 +829,26 @@ class ExcelWriter(metaclass=abc.ABCMeta): ... with zf.open("filename.xlsx", "w") as buffer: ... with pd.ExcelWriter(buffer) as writer: ... df.to_excel(writer) + + You can specify additional arguments to the underlying engine: + + >>> with pd.ExcelWriter( + ... "path_to_file.xlsx", + ... engine="xlsxwriter", + ... engine_kwargs={"options": {"nan_inf_to_errors": True}} + ... ) as writer: + ... df.to_excel(writer) + + In append mode, ``engine_kwargs`` are passed through to + openpyxl's ``load_workbook``: + + >>> with pd.ExcelWriter( + ... "path_to_file.xlsx", + ... engine="openpyxl", + ... mode="a", + ... engine_kwargs={"keep_vba": True} + ... ) as writer: + ... df.to_excel(writer, sheet_name="Sheet2") """ # Defining an ExcelWriter implementation (see abstract methods for more...) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index fa2779b01d681..add95c58cd809 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -11,7 +11,10 @@ from pandas._typing import StorageOptions from pandas.io.excel._base import ExcelWriter -from pandas.io.excel._util import validate_freeze_panes +from pandas.io.excel._util import ( + combine_kwargs, + validate_freeze_panes, +) from pandas.io.formats.excel import ExcelCell @@ -44,7 +47,9 @@ def __init__( engine_kwargs=engine_kwargs, ) - self.book = OpenDocumentSpreadsheet() + engine_kwargs = combine_kwargs(engine_kwargs, kwargs) + + self.book = OpenDocumentSpreadsheet(**engine_kwargs) self._style_dict: dict[str, str] = {} def save(self) -> None: diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index f561fb91d4ea3..27c03d4a74bc1 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -63,13 +63,13 @@ def __init__( if "r+" in self.mode: # Load from existing workbook from openpyxl import load_workbook - self.book = load_workbook(self.handles.handle) + self.book = load_workbook(self.handles.handle, **engine_kwargs) self.handles.handle.seek(0) self.sheets = {name: self.book[name] for name in self.book.sheetnames} else: # Create workbook object with default optimized_write=True. - self.book = Workbook() + self.book = Workbook(**engine_kwargs) if self.book.worksheets: self.book.remove(self.book.worksheets[0]) diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py index 4dadf64b44515..a74c03f330cd9 100644 --- a/pandas/io/excel/_xlwt.py +++ b/pandas/io/excel/_xlwt.py @@ -53,7 +53,7 @@ def __init__( if encoding is None: encoding = "ascii" - self.book = xlwt.Workbook(encoding=encoding) + self.book = xlwt.Workbook(encoding=encoding, **engine_kwargs) self.fm_datetime = xlwt.easyxf(num_format_str=self.datetime_format) self.fm_date = xlwt.easyxf(num_format_str=self.date_format) diff --git a/pandas/tests/io/excel/test_odswriter.py b/pandas/tests/io/excel/test_odswriter.py index 4bf6051fd36ef..0e6d1dac55506 100644 --- a/pandas/tests/io/excel/test_odswriter.py +++ b/pandas/tests/io/excel/test_odswriter.py @@ -19,23 +19,40 @@ def test_write_append_mode_raises(ext): ExcelWriter(f, engine="odf", mode="a") -@pytest.mark.parametrize("nan_inf_to_errors", [True, False]) -def test_kwargs(ext, nan_inf_to_errors): +def test_kwargs(ext): # GH 42286 - # odswriter doesn't utilize kwargs, nothing to check except that it works - kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}} + # GH 43445 + # test for error: OpenDocumentSpreadsheet does not accept any arguments + kwargs = {"kwarg": 1} with tm.ensure_clean(ext) as f: msg = re.escape("Use of **kwargs is deprecated") - with tm.assert_produces_warning(FutureWarning, match=msg): - with ExcelWriter(f, engine="odf", **kwargs) as _: - pass - - -@pytest.mark.parametrize("nan_inf_to_errors", [True, False]) -def test_engine_kwargs(ext, nan_inf_to_errors): + error = re.escape( + "OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'" + ) + with pytest.raises( + TypeError, + match=error, + ): + with tm.assert_produces_warning(FutureWarning, match=msg): + with ExcelWriter(f, engine="odf", **kwargs) as _: + pass + + +@pytest.mark.parametrize("engine_kwargs", [None, {"kwarg": 1}]) +def test_engine_kwargs(ext, engine_kwargs): # GH 42286 - # odswriter doesn't utilize engine_kwargs, nothing to check except that it works - engine_kwargs = {"options": {"nan_inf_to_errors": nan_inf_to_errors}} + # GH 43445 + # test for error: OpenDocumentSpreadsheet does not accept any arguments with tm.ensure_clean(ext) as f: - with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _: - pass + if engine_kwargs is not None: + error = re.escape( + "OpenDocumentSpreadsheet() got an unexpected keyword argument 'kwarg'" + ) + with pytest.raises( + TypeError, + match=error, + ): + ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) + else: + with ExcelWriter(f, engine="odf", engine_kwargs=engine_kwargs) as _: + pass diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 8535dae5f4b24..e0d4a0c12ecdf 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -85,30 +85,63 @@ def test_write_cells_merge_styled(ext): assert xcell_a2.font == openpyxl_sty_merged -@pytest.mark.parametrize("write_only", [True, False]) -def test_kwargs(ext, write_only): - # GH 42286 - # openpyxl doesn't utilize kwargs, only test that supplying a kwarg works - kwargs = {"write_only": write_only} +@pytest.mark.parametrize("iso_dates", [True, False]) +def test_kwargs(ext, iso_dates): + # GH 42286 GH 43445 + kwargs = {"iso_dates": iso_dates} with tm.ensure_clean(ext) as f: msg = re.escape("Use of **kwargs is deprecated") with tm.assert_produces_warning(FutureWarning, match=msg): with ExcelWriter(f, engine="openpyxl", **kwargs) as writer: + assert writer.book.iso_dates == iso_dates # ExcelWriter won't allow us to close without writing something DataFrame().to_excel(writer) -@pytest.mark.parametrize("write_only", [True, False]) -def test_engine_kwargs(ext, write_only): - # GH 42286 - # openpyxl doesn't utilize kwargs, only test that supplying a engine_kwarg works - engine_kwargs = {"write_only": write_only} +@pytest.mark.parametrize("iso_dates", [True, False]) +def test_engine_kwargs_write(ext, iso_dates): + # GH 42286 GH 43445 + engine_kwargs = {"iso_dates": iso_dates} with tm.ensure_clean(ext) as f: with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer: + assert writer.book.iso_dates == iso_dates # ExcelWriter won't allow us to close without writing something DataFrame().to_excel(writer) +def test_engine_kwargs_append_invalid(ext): + # GH 43445 + # test whether an invalid engine kwargs actually raises + with tm.ensure_clean(ext) as f: + DataFrame(["hello", "world"]).to_excel(f) + with pytest.raises( + TypeError, + match=re.escape( + "load_workbook() got an unexpected keyword argument 'apple_banana'" + ), + ): + with ExcelWriter( + f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"} + ) as writer: + # ExcelWriter needs us to write something to close properly + DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2") + + +@pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")]) +def test_engine_kwargs_append_data_only(ext, data_only, expected): + # GH 43445 + # tests whether the data_only engine_kwarg actually works well for + # openpyxl's load_workbook + with tm.ensure_clean(ext) as f: + DataFrame(["=1+1"]).to_excel(f) + with ExcelWriter( + f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only} + ) as writer: + assert writer.sheets["Sheet1"]["B2"].value == expected + # ExcelWriter needs us to writer something to close properly? + DataFrame().to_excel(writer, sheet_name="Sheet2") + + @pytest.mark.parametrize( "mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])] ) diff --git a/pandas/tests/io/excel/test_xlwt.py b/pandas/tests/io/excel/test_xlwt.py index c58b9763f9618..ec333defd85ac 100644 --- a/pandas/tests/io/excel/test_xlwt.py +++ b/pandas/tests/io/excel/test_xlwt.py @@ -101,25 +101,27 @@ def test_option_xls_writer_deprecated(ext): options.io.excel.xls.writer = "xlwt" -@pytest.mark.parametrize("write_only", [True, False]) -def test_kwargs(ext, write_only): +@pytest.mark.parametrize("style_compression", [0, 2]) +def test_kwargs(ext, style_compression): # GH 42286 - # xlwt doesn't utilize kwargs, only test that supplying a kwarg works - kwargs = {"write_only": write_only} + kwargs = {"style_compression": style_compression} with tm.ensure_clean(ext) as f: msg = re.escape("Use of **kwargs is deprecated") with tm.assert_produces_warning(FutureWarning, match=msg): - with ExcelWriter(f, engine="openpyxl", **kwargs) as writer: + with ExcelWriter(f, engine="xlwt", **kwargs) as writer: + assert ( + writer.book._Workbook__styles.style_compression == style_compression + ) # xlwt won't allow us to close without writing something DataFrame().to_excel(writer) -@pytest.mark.parametrize("write_only", [True, False]) -def test_engine_kwargs(ext, write_only): +@pytest.mark.parametrize("style_compression", [0, 2]) +def test_engine_kwargs(ext, style_compression): # GH 42286 - # xlwt doesn't utilize kwargs, only test that supplying a engine_kwarg works - engine_kwargs = {"write_only": write_only} + engine_kwargs = {"style_compression": style_compression} with tm.ensure_clean(ext) as f: - with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer: + with ExcelWriter(f, engine="xlwt", engine_kwargs=engine_kwargs) as writer: + assert writer.book._Workbook__styles.style_compression == style_compression # xlwt won't allow us to close without writing something DataFrame().to_excel(writer)