diff --git a/doc/source/whatsnew/v1.2.2.rst b/doc/source/whatsnew/v1.2.2.rst index 63e793c013497..8e69bf5e1991a 100644 --- a/doc/source/whatsnew/v1.2.2.rst +++ b/doc/source/whatsnew/v1.2.2.rst @@ -21,6 +21,7 @@ Fixed regressions - Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`) - Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`) - Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`) +- Fixed regression in :meth:`~DataFrame.to_excel` creating corrupt files when appending (``mode="a"``) to an existing file (:issue:`39576`) - Fixed regression in :meth:`DataFrame.transform` failing in case of an empty DataFrame or Series (:issue:`39636`) - Fixed regression in :meth:`core.window.rolling.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`) - diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 64c64b5009b0c..b53db6c726c4d 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -1,6 +1,7 @@ from __future__ import annotations from distutils.version import LooseVersion +import mmap from typing import TYPE_CHECKING, Dict, List, Optional import numpy as np @@ -40,6 +41,7 @@ def __init__( from openpyxl import load_workbook self.book = load_workbook(self.handles.handle) + self.handles.handle.seek(0) else: # Create workbook object with default optimized_write=True. self.book = Workbook() @@ -52,6 +54,9 @@ def save(self): Save workbook to disk. """ self.book.save(self.handles.handle) + if "r+" in self.mode and not isinstance(self.handles.handle, mmap.mmap): + # truncate file to the written content + self.handles.handle.truncate() @classmethod def _convert_to_style_kwargs(cls, style_dict: dict) -> Dict[str, Serialisable]: diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 640501baffc62..b365f4edab83c 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -1,4 +1,5 @@ from distutils.version import LooseVersion +from pathlib import Path import numpy as np import pytest @@ -149,3 +150,22 @@ def test_read_with_bad_dimension(datapath, ext, header, expected_data, filename) result = pd.read_excel(path, header=header) expected = DataFrame(expected_data) tm.assert_frame_equal(result, expected) + + +def test_append_mode_file(ext): + # GH 39576 + df = DataFrame() + + with tm.ensure_clean(ext) as f: + df.to_excel(f, engine="openpyxl") + + with ExcelWriter(f, mode="a", engine="openpyxl") as writer: + df.to_excel(writer) + + # make sure that zip files are not concatenated by making sure that + # "docProps/app.xml" only occurs twice in the file + data = Path(f).read_bytes() + first = data.find(b"docProps/app.xml") + second = data.find(b"docProps/app.xml", first + 1) + third = data.find(b"docProps/app.xml", second + 1) + assert second != -1 and third == -1