Skip to content

Commit 0c31b56

Browse files
twoertweinsimonjayhawkins
authored andcommitted
Backport PR pandas-dev#39605: REGR: appending to existing excel file created corrupt files
1 parent 73e8a81 commit 0c31b56

File tree

3 files changed

+26
-0
lines changed

3 files changed

+26
-0
lines changed

doc/source/whatsnew/v1.2.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Fixed regressions
2121
- Fixed regression in :meth:`~DataFrame.to_pickle` failing to create bz2/xz compressed pickle files with ``protocol=5`` (:issue:`39002`)
2222
- Fixed regression in :func:`pandas.testing.assert_series_equal` and :func:`pandas.testing.assert_frame_equal` always raising ``AssertionError`` when comparing extension dtypes (:issue:`39410`)
2323
- Fixed regression in :meth:`~DataFrame.to_csv` opening ``codecs.StreamWriter`` in binary mode instead of in text mode and ignoring user-provided ``mode`` (:issue:`39247`)
24+
- Fixed regression in :meth:`~DataFrame.to_excel` creating corrupt files when appending (``mode="a"``) to an existing file (:issue:`39576`)
2425
- Fixed regression in :meth:`core.window.rolling.Rolling.count` where the ``min_periods`` argument would be set to ``0`` after the operation (:issue:`39554`)
2526
-
2627

pandas/io/excel/_openpyxl.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from distutils.version import LooseVersion
2+
import mmap
23
from typing import TYPE_CHECKING, Dict, List, Optional
34

45
import numpy as np
@@ -38,6 +39,7 @@ def __init__(
3839
from openpyxl import load_workbook
3940

4041
self.book = load_workbook(self.handles.handle)
42+
self.handles.handle.seek(0)
4143
else:
4244
# Create workbook object with default optimized_write=True.
4345
self.book = Workbook()
@@ -50,6 +52,9 @@ def save(self):
5052
Save workbook to disk.
5153
"""
5254
self.book.save(self.handles.handle)
55+
if "r+" in self.mode and not isinstance(self.handles.handle, mmap.mmap):
56+
# truncate file to the written content
57+
self.handles.handle.truncate()
5358

5459
@classmethod
5560
def _convert_to_style_kwargs(cls, style_dict: dict) -> Dict[str, "Serialisable"]:

pandas/tests/io/excel/test_openpyxl.py

+20
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from distutils.version import LooseVersion
2+
from pathlib import Path
23

34
import numpy as np
45
import pytest
@@ -149,3 +150,22 @@ def test_read_with_bad_dimension(datapath, ext, header, expected_data, filename)
149150
result = pd.read_excel(path, header=header)
150151
expected = DataFrame(expected_data)
151152
tm.assert_frame_equal(result, expected)
153+
154+
155+
def test_append_mode_file(ext):
156+
# GH 39576
157+
df = DataFrame()
158+
159+
with tm.ensure_clean(ext) as f:
160+
df.to_excel(f, engine="openpyxl")
161+
162+
with ExcelWriter(f, mode="a", engine="openpyxl") as writer:
163+
df.to_excel(writer)
164+
165+
# make sure that zip files are not concatenated by making sure that
166+
# "docProps/app.xml" only occurs twice in the file
167+
data = Path(f).read_bytes()
168+
first = data.find(b"docProps/app.xml")
169+
second = data.find(b"docProps/app.xml", first + 1)
170+
third = data.find(b"docProps/app.xml", second + 1)
171+
assert second != -1 and third == -1

0 commit comments

Comments
 (0)