Skip to content

ENH: add if_sheet_exists='overlay' to ExcelWriter #42222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Nov 17, 2021
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -207,12 +207,14 @@ Other enhancements
- Added :meth:`.ExponentialMovingWindow.sum` (:issue:`13297`)
- :meth:`Series.str.split` now supports a ``regex`` argument that explicitly specifies whether the pattern is a regular expression. Default is ``None`` (:issue:`43563`, :issue:`32835`, :issue:`25549`)
- :meth:`DataFrame.dropna` now accepts a single label as ``subset`` along with array-like (:issue:`41021`)
- :class:`ExcelWriter` argument ``if_sheet_exists="overlay"`` option added (:issue:`40231`)
- :meth:`read_excel` now accepts a ``decimal`` argument that allow the user to specify the decimal point when parsing string columns to numeric (:issue:`14403`)
- :meth:`.GroupBy.mean` now supports `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`43731`)
- :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`)
- :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`44460`)
-


.. ---------------------------------------------------------------------------

.. _whatsnew_140.notable_bug_fixes:
Expand Down
36 changes: 33 additions & 3 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,15 +701,23 @@ class ExcelWriter(metaclass=abc.ABCMeta):
be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".

.. versionadded:: 1.2.0
if_sheet_exists : {'error', 'new', 'replace'}, default 'error'

if_sheet_exists : {'error', 'new', 'replace', 'overlay'}, default 'error'
How to behave when trying to write to a sheet that already
exists (append mode only).

* error: raise a ValueError.
* new: Create a new sheet, with a name determined by the engine.
* replace: Delete the contents of the sheet before writing to it.
* overlay: Write contents to the existing sheet without removing the old
contents.

.. versionadded:: 1.3.0

.. versionchanged:: 1.4.0

Added ``overlay`` option

engine_kwargs : dict, optional
Keyword arguments to be passed into the engine.

Expand Down Expand Up @@ -775,6 +783,28 @@ class ExcelWriter(metaclass=abc.ABCMeta):
>>> with pd.ExcelWriter("path_to_file.xlsx", mode="a", engine="openpyxl") as writer:
... df.to_excel(writer, sheet_name="Sheet3")

Here, the `if_sheet_exists` parameter can be set to replace a sheet if it
already exists:

>>> with ExcelWriter(
... "path_to_file.xlsx",
... mode="a",
... engine="openpyxl",
... if_sheet_exists="replace",
... ) as writer:
... df.to_excel(writer, sheet_name="Sheet1")

You can also write multiple DataFrames to a single sheet. Note that the
``if_sheet_exists`` parameter needs to be set to ``overlay``:

>>> with ExcelWriter("path_to_file.xlsx",
... mode="a",
... engine="openpyxl",
... if_sheet_exists="overlay",
... ) as writer:
... df1.to_excel(writer, sheet_name="Sheet1")
... df2.to_excel(writer, sheet_name="Sheet1", startcol=3)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does overlay require startrow to be not 1? (e.g should we error if this is not specified)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without startrow (e.g. startrow=0) it will overwrite the headers. This could be desired behaviour so I think we should not error.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is what I would expect - unspecified starts at the very top left corner of the sheet.


You can store Excel file in RAM:

>>> import io
Expand Down Expand Up @@ -962,10 +992,10 @@ def __init__(

self.mode = mode

if if_sheet_exists not in [None, "error", "new", "replace"]:
if if_sheet_exists not in (None, "error", "new", "replace", "overlay"):
raise ValueError(
f"'{if_sheet_exists}' is not valid for if_sheet_exists. "
"Valid options are 'error', 'new' and 'replace'."
"Valid options are 'error', 'new', 'replace' and 'overlay'."
)
if if_sheet_exists and "r+" not in mode:
raise ValueError("if_sheet_exists is only valid in append mode (mode='a')")
Expand Down
4 changes: 3 additions & 1 deletion pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,10 +437,12 @@ def write_cells(
f"Sheet '{sheet_name}' already exists and "
f"if_sheet_exists is set to 'error'."
)
elif self.if_sheet_exists == "overlay":
wks = self.sheets[sheet_name]
else:
raise ValueError(
f"'{self.if_sheet_exists}' is not valid for if_sheet_exists. "
"Valid options are 'error', 'new' and 'replace'."
"Valid options are 'error', 'new', 'replace' and 'overlay'."
)
else:
wks = self.sheets[sheet_name]
Expand Down
36 changes: 35 additions & 1 deletion pandas/tests/io/excel/test_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def test_write_append_mode(ext, mode, expected):
[
("new", 2, ["apple", "banana"]),
("replace", 1, ["pear"]),
("overlay", 1, ["pear", "banana"]),
],
)
def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
Expand All @@ -164,13 +165,46 @@ def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected
wb.close()


@pytest.mark.parametrize(
"startrow, startcol, greeting, goodbye",
[
(0, 0, ["poop", "world"], ["goodbye", "people"]),
(0, 1, ["hello", "world"], ["poop", "people"]),
(1, 0, ["hello", "poop"], ["goodbye", "people"]),
(1, 1, ["hello", "world"], ["goodbye", "poop"]),
],
)
def test_append_overlay_startrow_startcol(ext, startrow, startcol, greeting, goodbye):
df1 = DataFrame({"greeting": ["hello", "world"], "goodbye": ["goodbye", "people"]})
df2 = DataFrame(["poop"])

with tm.ensure_clean(ext) as f:
df1.to_excel(f, engine="openpyxl", sheet_name="poo", index=False)
with ExcelWriter(
f, engine="openpyxl", mode="a", if_sheet_exists="overlay"
) as writer:
# use startrow+1 because we don't have a header
df2.to_excel(
writer,
index=False,
header=False,
startrow=startrow + 1,
startcol=startcol,
sheet_name="poo",
)

result = pd.read_excel(f, sheet_name="poo", engine="openpyxl")
expected = DataFrame({"greeting": greeting, "goodbye": goodbye})
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"if_sheet_exists,msg",
[
(
"invalid",
"'invalid' is not valid for if_sheet_exists. Valid options "
"are 'error', 'new' and 'replace'.",
"are 'error', 'new', 'replace' and 'overlay'.",
),
(
"error",
Expand Down