From 88f2d2ca02dbbbbcee32d70720fcea79c7d25343 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 3 Nov 2023 07:57:12 +0100 Subject: [PATCH 1/3] ENH: Allow passing `read_only`, `data_only` and `keep_links` arguments to openpyxl using `engine_kwargs` Previously it was not possible to override the default values for `openpyxl.reader.excel.load_workbook`'s `read_only`, `data_only` and `keep_links` arguments (see #55027). Now these options can be changed via `engine_kwargs`. Closes #55027 --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/io/excel/_openpyxl.py | 10 +++++++--- pandas/tests/io/excel/test_openpyxl.py | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 26b5705a1f3db..3893f4653ac1b 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -81,6 +81,7 @@ Other enhancements - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) +- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`) - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index ca7e84f7d6476..81eec2f06cd9a 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -567,11 +567,15 @@ def load_workbook( ) -> Workbook: from openpyxl import load_workbook + if engine_kwargs is None: + engine_kwargs = {} + + engine_kwargs.setdefault("read_only", True) + engine_kwargs.setdefault("data_only", True) + engine_kwargs.setdefault("keep_links", False) + return load_workbook( filepath_or_buffer, - read_only=True, - data_only=True, - keep_links=False, **engine_kwargs, ) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index da94c74f2303e..5e78244edb2e7 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -13,6 +13,7 @@ ExcelWriter, _OpenpyxlWriter, ) +from pandas.io.excel._openpyxl import OpenpyxlReader openpyxl = pytest.importorskip("openpyxl") @@ -130,6 +131,19 @@ def test_engine_kwargs_append_data_only(ext, data_only, expected): DataFrame().to_excel(writer, sheet_name="Sheet2") +@pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"]) +@pytest.mark.parametrize("kwarg_value", [True, False]) +def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value): + # GH 55027 + # test that `read_only` and `data_only` can be passed to + # `openpyxl.reader.excel.load_workbook` via `engine_kwargs` + filename = datapath("io", "data", "excel", "test1" + ext) + with contextlib.closing( + OpenpyxlReader(filename, engine_kwargs={kwarg_name: kwarg_value}) + ) as reader: + assert getattr(reader.book, kwarg_name) == kwarg_value + + @pytest.mark.parametrize( "mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])] ) From 000503aff9fc359134a737c760ce0862d311481e Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sat, 4 Nov 2023 08:43:17 +0100 Subject: [PATCH 2/3] test data_only roundtrip --- pandas/tests/io/excel/test_openpyxl.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 5e78244edb2e7..53cbd1ce3cceb 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -130,6 +130,18 @@ def test_engine_kwargs_append_data_only(ext, data_only, expected): # ExcelWriter needs us to writer something to close properly? DataFrame().to_excel(writer, sheet_name="Sheet2") + # ensure that data_only also works for reading + # and that formulas/values roundtrip + assert ( + pd.read_excel( + f, + sheet_name="Sheet1", + engine="openpyxl", + engine_kwargs={"data_only": data_only}, + ).iloc[0, 1] + == expected + ) + @pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"]) @pytest.mark.parametrize("kwarg_value", [True, False]) From 02045850e27284ffaa21cf1aebbc4a23306b1f60 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 6 Nov 2023 07:20:30 +0100 Subject: [PATCH 3/3] don't modify mutable parameter --- pandas/io/excel/_openpyxl.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 81eec2f06cd9a..c546443868a62 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -567,16 +567,11 @@ def load_workbook( ) -> Workbook: from openpyxl import load_workbook - if engine_kwargs is None: - engine_kwargs = {} - - engine_kwargs.setdefault("read_only", True) - engine_kwargs.setdefault("data_only", True) - engine_kwargs.setdefault("keep_links", False) + default_kwargs = {"read_only": True, "data_only": True, "keep_links": False} return load_workbook( filepath_or_buffer, - **engine_kwargs, + **(default_kwargs | engine_kwargs), ) @property