Skip to content

Commit ac8977f

Browse files
authored
ENH: Add if_sheet_exists parameter to ExcelWriter (#40231)
Thanks @mrob95!
1 parent 03e1019 commit ac8977f

File tree

8 files changed

+125
-3
lines changed

8 files changed

+125
-3
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ Other enhancements
196196
- :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
197197
- Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`)
198198
- :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)
199+
- :class:`pandas.ExcelWriter` now accepts an ``if_sheet_exists`` parameter to control the behaviour of append mode when writing to existing sheets (:issue:`40230`)
199200
- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
200201
- :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
201202
- :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)

pandas/io/excel/_base.py

+22
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,15 @@ class ExcelWriter(metaclass=abc.ABCMeta):
664664
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".
665665
666666
.. versionadded:: 1.2.0
667+
if_sheet_exists : {'error', 'new', 'replace'}, default 'error'
668+
How to behave when trying to write to a sheet that already
669+
exists (append mode only).
670+
671+
* error: raise a ValueError.
672+
* new: Create a new sheet, with a name determined by the engine.
673+
* replace: Delete the contents of the sheet before writing to it.
674+
675+
.. versionadded:: 1.3.0
667676
engine_kwargs : dict, optional
668677
Keyword arguments to be passed into the engine.
669678
@@ -760,6 +769,7 @@ def __new__(
760769
datetime_format=None,
761770
mode: str = "w",
762771
storage_options: StorageOptions = None,
772+
if_sheet_exists: str | None = None,
763773
engine_kwargs: dict | None = None,
764774
**kwargs,
765775
):
@@ -861,6 +871,7 @@ def __init__(
861871
datetime_format=None,
862872
mode: str = "w",
863873
storage_options: StorageOptions = None,
874+
if_sheet_exists: str | None = None,
864875
engine_kwargs: dict | None = None,
865876
**kwargs,
866877
):
@@ -896,6 +907,17 @@ def __init__(
896907

897908
self.mode = mode
898909

910+
if if_sheet_exists not in [None, "error", "new", "replace"]:
911+
raise ValueError(
912+
f"'{if_sheet_exists}' is not valid for if_sheet_exists. "
913+
"Valid options are 'error', 'new' and 'replace'."
914+
)
915+
if if_sheet_exists and "r+" not in mode:
916+
raise ValueError("if_sheet_exists is only valid in append mode (mode='a')")
917+
if if_sheet_exists is None:
918+
if_sheet_exists = "error"
919+
self.if_sheet_exists = if_sheet_exists
920+
899921
def __fspath__(self):
900922
return getattr(self.handles.handle, "name", "")
901923

pandas/io/excel/_odswriter.py

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ def __init__(
3030
datetime_format=None,
3131
mode: str = "w",
3232
storage_options: StorageOptions = None,
33+
if_sheet_exists: Optional[str] = None,
3334
engine_kwargs: Optional[Dict[str, Any]] = None,
3435
):
3536
from odf.opendocument import OpenDocumentSpreadsheet
@@ -41,6 +42,7 @@ def __init__(
4142
path,
4243
mode=mode,
4344
storage_options=storage_options,
45+
if_sheet_exists=if_sheet_exists,
4446
engine_kwargs=engine_kwargs,
4547
)
4648

pandas/io/excel/_openpyxl.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def __init__(
3737
datetime_format=None,
3838
mode: str = "w",
3939
storage_options: StorageOptions = None,
40+
if_sheet_exists: str | None = None,
4041
engine_kwargs: dict[str, Any] | None = None,
4142
):
4243
# Use the openpyxl module as the Excel writer.
@@ -46,6 +47,7 @@ def __init__(
4647
path,
4748
mode=mode,
4849
storage_options=storage_options,
50+
if_sheet_exists=if_sheet_exists,
4951
engine_kwargs=engine_kwargs,
5052
)
5153

@@ -56,6 +58,8 @@ def __init__(
5658

5759
self.book = load_workbook(self.handles.handle)
5860
self.handles.handle.seek(0)
61+
self.sheets = {name: self.book[name] for name in self.book.sheetnames}
62+
5963
else:
6064
# Create workbook object with default optimized_write=True.
6165
self.book = Workbook()
@@ -414,8 +418,26 @@ def write_cells(
414418

415419
_style_cache: dict[str, dict[str, Serialisable]] = {}
416420

417-
if sheet_name in self.sheets:
418-
wks = self.sheets[sheet_name]
421+
if sheet_name in self.sheets and self.if_sheet_exists != "new":
422+
if "r+" in self.mode:
423+
if self.if_sheet_exists == "replace":
424+
old_wks = self.sheets[sheet_name]
425+
target_index = self.book.index(old_wks)
426+
del self.book[sheet_name]
427+
wks = self.book.create_sheet(sheet_name, target_index)
428+
self.sheets[sheet_name] = wks
429+
elif self.if_sheet_exists == "error":
430+
raise ValueError(
431+
f"Sheet '{sheet_name}' already exists and "
432+
f"if_sheet_exists is set to 'error'."
433+
)
434+
else:
435+
raise ValueError(
436+
f"'{self.if_sheet_exists}' is not valid for if_sheet_exists. "
437+
"Valid options are 'error', 'new' and 'replace'."
438+
)
439+
else:
440+
wks = self.sheets[sheet_name]
419441
else:
420442
wks = self.book.create_sheet()
421443
wks.title = sheet_name

pandas/io/excel/_xlsxwriter.py

+2
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ def __init__(
177177
datetime_format=None,
178178
mode: str = "w",
179179
storage_options: StorageOptions = None,
180+
if_sheet_exists: Optional[str] = None,
180181
engine_kwargs: Optional[Dict[str, Any]] = None,
181182
):
182183
# Use the xlsxwriter module as the Excel writer.
@@ -194,6 +195,7 @@ def __init__(
194195
datetime_format=datetime_format,
195196
mode=mode,
196197
storage_options=storage_options,
198+
if_sheet_exists=if_sheet_exists,
197199
engine_kwargs=engine_kwargs,
198200
)
199201

pandas/io/excel/_xlwt.py

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def __init__(
2828
encoding=None,
2929
mode: str = "w",
3030
storage_options: StorageOptions = None,
31+
if_sheet_exists: Optional[str] = None,
3132
engine_kwargs: Optional[Dict[str, Any]] = None,
3233
):
3334
# Use the xlwt module as the Excel writer.
@@ -40,6 +41,7 @@ def __init__(
4041
path,
4142
mode=mode,
4243
storage_options=storage_options,
44+
if_sheet_exists=if_sheet_exists,
4345
engine_kwargs=engine_kwargs,
4446
)
4547

pandas/tests/io/excel/test_openpyxl.py

+64-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from pathlib import Path
2+
import re
23

34
import numpy as np
45
import pytest
@@ -109,6 +110,66 @@ def test_write_append_mode(ext, mode, expected):
109110
assert wb2.worksheets[index]["A1"].value == cell_value
110111

111112

113+
@pytest.mark.parametrize(
114+
"if_sheet_exists,num_sheets,expected",
115+
[
116+
("new", 2, ["apple", "banana"]),
117+
("replace", 1, ["pear"]),
118+
],
119+
)
120+
def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
121+
# GH 40230
122+
df1 = DataFrame({"fruit": ["apple", "banana"]})
123+
df2 = DataFrame({"fruit": ["pear"]})
124+
125+
with tm.ensure_clean(ext) as f:
126+
df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False)
127+
with ExcelWriter(
128+
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
129+
) as writer:
130+
df2.to_excel(writer, sheet_name="foo", index=False)
131+
132+
wb = openpyxl.load_workbook(f)
133+
assert len(wb.sheetnames) == num_sheets
134+
assert wb.sheetnames[0] == "foo"
135+
result = pd.read_excel(wb, "foo", engine="openpyxl")
136+
assert list(result["fruit"]) == expected
137+
if len(wb.sheetnames) == 2:
138+
result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
139+
tm.assert_frame_equal(result, df2)
140+
wb.close()
141+
142+
143+
@pytest.mark.parametrize(
144+
"if_sheet_exists,msg",
145+
[
146+
(
147+
"invalid",
148+
"'invalid' is not valid for if_sheet_exists. Valid options "
149+
"are 'error', 'new' and 'replace'.",
150+
),
151+
(
152+
"error",
153+
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
154+
),
155+
(
156+
None,
157+
"Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
158+
),
159+
],
160+
)
161+
def test_if_sheet_exists_raises(ext, if_sheet_exists, msg):
162+
# GH 40230
163+
df = DataFrame({"fruit": ["pear"]})
164+
with tm.ensure_clean(ext) as f:
165+
with pytest.raises(ValueError, match=re.escape(msg)):
166+
df.to_excel(f, "foo", engine="openpyxl")
167+
with ExcelWriter(
168+
f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
169+
) as writer:
170+
df.to_excel(writer, sheet_name="foo")
171+
172+
112173
def test_to_excel_with_openpyxl_engine(ext):
113174
# GH 29854
114175
with tm.ensure_clean(ext) as filename:
@@ -175,7 +236,9 @@ def test_append_mode_file(ext):
175236
with tm.ensure_clean(ext) as f:
176237
df.to_excel(f, engine="openpyxl")
177238

178-
with ExcelWriter(f, mode="a", engine="openpyxl") as writer:
239+
with ExcelWriter(
240+
f, mode="a", engine="openpyxl", if_sheet_exists="new"
241+
) as writer:
179242
df.to_excel(writer)
180243

181244
# make sure that zip files are not concatenated by making sure that

pandas/tests/io/excel/test_writers.py

+8
Original file line numberDiff line numberDiff line change
@@ -1325,6 +1325,14 @@ def test_excel_duplicate_columns_with_names(self, path):
13251325
expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"])
13261326
tm.assert_frame_equal(result, expected)
13271327

1328+
def test_if_sheet_exists_raises(self, ext):
1329+
# GH 40230
1330+
msg = "if_sheet_exists is only valid in append mode (mode='a')"
1331+
1332+
with tm.ensure_clean(ext) as f:
1333+
with pytest.raises(ValueError, match=re.escape(msg)):
1334+
ExcelWriter(f, if_sheet_exists="replace")
1335+
13281336

13291337
class TestExcelWriterEngineTests:
13301338
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)