Skip to content

Commit 0d786bf

Browse files
authored
ENH: Allow passing read_only, data_only and keep_links arguments to openpyxl using engine_kwargs (#55807)
* ENH: Allow passing `read_only`, `data_only` and `keep_links` arguments to openpyxl using `engine_kwargs` Previously it was not possible to override the default values for `openpyxl.reader.excel.load_workbook`'s `read_only`, `data_only` and `keep_links` arguments (see #55027). Now these options can be changed via `engine_kwargs`. Closes #55027 * test data_only roundtrip * don't modify mutable parameter
1 parent d734496 commit 0d786bf

File tree

3 files changed

+30
-4
lines changed

3 files changed

+30
-4
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ Other enhancements
101101
- :func:`tseries.api.guess_datetime_format` is now part of the public API (:issue:`54727`)
102102
- :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
103103
- :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
104+
- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
104105
- DataFrame.apply now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
105106
- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
106107
- Improved error message when constructing :class:`Period` with invalid offsets such as "QS" (:issue:`55785`)

pandas/io/excel/_openpyxl.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -567,12 +567,11 @@ def load_workbook(
567567
) -> Workbook:
568568
from openpyxl import load_workbook
569569

570+
default_kwargs = {"read_only": True, "data_only": True, "keep_links": False}
571+
570572
return load_workbook(
571573
filepath_or_buffer,
572-
read_only=True,
573-
data_only=True,
574-
keep_links=False,
575-
**engine_kwargs,
574+
**(default_kwargs | engine_kwargs),
576575
)
577576

578577
@property

pandas/tests/io/excel/test_openpyxl.py

+26
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
ExcelWriter,
1414
_OpenpyxlWriter,
1515
)
16+
from pandas.io.excel._openpyxl import OpenpyxlReader
1617

1718
openpyxl = pytest.importorskip("openpyxl")
1819

@@ -129,6 +130,31 @@ def test_engine_kwargs_append_data_only(ext, data_only, expected):
129130
# ExcelWriter needs us to writer something to close properly?
130131
DataFrame().to_excel(writer, sheet_name="Sheet2")
131132

133+
# ensure that data_only also works for reading
134+
# and that formulas/values roundtrip
135+
assert (
136+
pd.read_excel(
137+
f,
138+
sheet_name="Sheet1",
139+
engine="openpyxl",
140+
engine_kwargs={"data_only": data_only},
141+
).iloc[0, 1]
142+
== expected
143+
)
144+
145+
146+
@pytest.mark.parametrize("kwarg_name", ["read_only", "data_only"])
147+
@pytest.mark.parametrize("kwarg_value", [True, False])
148+
def test_engine_kwargs_append_reader(datapath, ext, kwarg_name, kwarg_value):
149+
# GH 55027
150+
# test that `read_only` and `data_only` can be passed to
151+
# `openpyxl.reader.excel.load_workbook` via `engine_kwargs`
152+
filename = datapath("io", "data", "excel", "test1" + ext)
153+
with contextlib.closing(
154+
OpenpyxlReader(filename, engine_kwargs={kwarg_name: kwarg_value})
155+
) as reader:
156+
assert getattr(reader.book, kwarg_name) == kwarg_value
157+
132158

133159
@pytest.mark.parametrize(
134160
"mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]

0 commit comments

Comments
 (0)