diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 6ca3cc7dd7a96..e1fa934679100 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -101,6 +101,7 @@ Other enhancements - :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`) - :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`) - :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`) +- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`) - DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`) - Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`) - Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index ca7e84f7d6476..c546443868a62 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -567,12 +567,11 @@ def load_workbook( ) -> Workbook: from openpyxl import load_workbook + default_kwargs = {"read_only": True, "data_only": True, "keep_links": False} + return load_workbook( filepath_or_buffer, - read_only=True, - data_only=True, - keep_links=False, - **engine_kwargs, + **(default_kwargs | engine_kwargs), ) @property diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index da94c74f2303e..53cbd1ce3cceb 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -13,6 +13,7 @@ ExcelWriter, _OpenpyxlWriter, ) +from pandas.io.excel._openpyxl import OpenpyxlReader openpyxl = pytest.importorskip("openpyxl") @@ -129,6 +130,31 @@ def test_engine_kwargs_append_data_only(ext, data_only, expected): # ExcelWriter needs us to writer something to close properly? DataFrame().to_excel(writer, sheet_name="Sheet2") + # ensure that data_only also works for reading + # and that formulas/values roundtrip + assert ( + pd.read_excel( + f, + sheet_name="Sheet1", + engine="openpyxl", + engine_kwargs={"data_only": data_only}, + ).iloc[0, 1] + == expected + ) + + +@pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"]) +@pytest.mark.parametrize("kwarg_value", [True, False]) +def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value): + # GH 55027 + # test that `read_only` and `data_only` can be passed to + # `openpyxl.reader.excel.load_workbook` via `engine_kwargs` + filename = datapath("io", "data", "excel", "test1" + ext) + with contextlib.closing( + OpenpyxlReader(filename, engine_kwargs={kwarg_name: kwarg_value}) + ) as reader: + assert getattr(reader.book, kwarg_name) == kwarg_value + @pytest.mark.parametrize( "mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]