Skip to content

Commit 6ff2e7c

Browse files
REGR: to_stata tried to remove file before closing it (#39202)
Co-authored-by: Simon Hawkins <[email protected]>
1 parent 358c614 commit 6ff2e7c

File tree

6 files changed

+79
-95
lines changed

6 files changed

+79
-95
lines changed

doc/source/whatsnew/v1.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Fixed regressions
1717
- Fixed regression in :meth:`~DataFrame.to_csv` that created corrupted zip files when there were more rows than ``chunksize`` (:issue:`38714`)
1818
- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`)
1919
- Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`)
20+
- Fixed regression in :meth:`DataFrame.to_stata` not removing the created file when an error occured (:issue:`39202`)
2021
- Fixed regression in ``DataFrame.__setitem__`` raising ``ValueError`` when expanding :class:`DataFrame` and new column is from type ``"0 - name"`` (:issue:`39010`)
2122
- Fixed regression in setting with :meth:`DataFrame.loc` raising ``ValueError`` when :class:`DataFrame` has unsorted :class:`MultiIndex` columns and indexer is a scalar (:issue:`38601`)
2223
- Fixed regression in setting with :meth:`DataFrame.loc` raising ``KeyError`` with :class:`MultiIndex` and list-like columns indexer enlarging :class:`DataFrame` (:issue:`39147`)

pandas/_testing/contexts.py

+30-44
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
from contextlib import contextmanager
22
import os
3+
from pathlib import Path
4+
import random
35
from shutil import rmtree
6+
import string
47
import tempfile
8+
from typing import IO, Any, Union
59

610
import numpy as np
711

@@ -73,66 +77,48 @@ def setTZ(tz):
7377

7478

7579
@contextmanager
76-
def ensure_clean(filename=None, return_filelike=False, **kwargs):
80+
def ensure_clean(filename=None, return_filelike: bool = False, **kwargs: Any):
7781
"""
7882
Gets a temporary path and agrees to remove on close.
7983
84+
This implementation does not use tempfile.mkstemp to avoid having a file handle.
85+
If the code using the returned path wants to delete the file itself, windows
86+
requires that no program has a file handle to it.
87+
8088
Parameters
8189
----------
8290
filename : str (optional)
83-
if None, creates a temporary file which is then removed when out of
84-
scope. if passed, creates temporary file with filename as ending.
91+
suffix of the created file.
8592
return_filelike : bool (default False)
8693
if True, returns a file-like which is *always* cleaned. Necessary for
8794
savefig and other functions which want to append extensions.
8895
**kwargs
89-
Additional keywords passed in for creating a temporary file.
90-
:meth:`tempFile.TemporaryFile` is used when `return_filelike` is ``True``.
91-
:meth:`tempfile.mkstemp` is used when `return_filelike` is ``False``.
92-
Note that the `filename` parameter will be passed in as the `suffix`
93-
argument to either function.
96+
Additional keywords are passed to open().
9497
95-
See Also
96-
--------
97-
tempfile.TemporaryFile
98-
tempfile.mkstemp
9998
"""
100-
filename = filename or ""
101-
fd = None
102-
103-
kwargs["suffix"] = filename
99+
folder = Path(tempfile.gettempdir())
104100

105-
if return_filelike:
106-
f = tempfile.TemporaryFile(**kwargs)
107-
108-
try:
109-
yield f
110-
finally:
111-
f.close()
112-
else:
113-
# Don't generate tempfile if using a path with directory specified.
114-
if len(os.path.dirname(filename)):
115-
raise ValueError("Can't pass a qualified name to ensure_clean()")
101+
if filename is None:
102+
filename = ""
103+
filename = (
104+
"".join(random.choices(string.ascii_letters + string.digits, k=30)) + filename
105+
)
106+
path = folder / filename
116107

117-
try:
118-
fd, filename = tempfile.mkstemp(**kwargs)
119-
except UnicodeEncodeError:
120-
import pytest
108+
path.touch()
121109

122-
pytest.skip("no unicode file names on this system")
110+
handle_or_str: Union[str, IO] = str(path)
111+
if return_filelike:
112+
kwargs.setdefault("mode", "w+b")
113+
handle_or_str = open(path, **kwargs)
123114

124-
try:
125-
yield filename
126-
finally:
127-
try:
128-
os.close(fd)
129-
except OSError:
130-
print(f"Couldn't close file descriptor: {fd} (file: {filename})")
131-
try:
132-
if os.path.exists(filename):
133-
os.remove(filename)
134-
except OSError as e:
135-
print(f"Exception on removing file: {e}")
115+
try:
116+
yield handle_or_str
117+
finally:
118+
if not isinstance(handle_or_str, str):
119+
handle_or_str.close()
120+
if path.is_file():
121+
path.unlink()
136122

137123

138124
@contextmanager

pandas/io/stata.py

+12-13
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import datetime
1616
from io import BytesIO
1717
import os
18-
from pathlib import Path
1918
import struct
2019
import sys
2120
from typing import (
@@ -2466,8 +2465,8 @@ def write_file(self) -> None:
24662465
if self.handles.compression["method"] is not None:
24672466
# ZipFile creates a file (with the same name) for each write call.
24682467
# Write it first into a buffer and then write the buffer to the ZipFile.
2469-
self._output_file = self.handles.handle
2470-
self.handles.handle = BytesIO()
2468+
self._output_file, self.handles.handle = self.handles.handle, BytesIO()
2469+
self.handles.created_handles.append(self.handles.handle)
24712470

24722471
try:
24732472
self._write_header(
@@ -2488,20 +2487,23 @@ def write_file(self) -> None:
24882487
self._write_value_labels()
24892488
self._write_file_close_tag()
24902489
self._write_map()
2491-
except Exception as exc:
24922490
self._close()
2493-
if isinstance(self._fname, (str, Path)):
2491+
except Exception as exc:
2492+
self.handles.close()
2493+
# Only @runtime_checkable protocols can be used with instance and class
2494+
# checks
2495+
if isinstance(
2496+
self._fname, (str, os.PathLike) # type: ignore[misc]
2497+
) and os.path.isfile(self._fname):
24942498
try:
24952499
os.unlink(self._fname)
24962500
except OSError:
24972501
warnings.warn(
24982502
f"This save was not successful but {self._fname} could not "
2499-
"be deleted. This file is not valid.",
2503+
"be deleted. This file is not valid.",
25002504
ResourceWarning,
25012505
)
25022506
raise exc
2503-
else:
2504-
self._close()
25052507

25062508
def _close(self) -> None:
25072509
"""
@@ -2513,11 +2515,8 @@ def _close(self) -> None:
25132515
# write compression
25142516
if self._output_file is not None:
25152517
assert isinstance(self.handles.handle, BytesIO)
2516-
bio = self.handles.handle
2517-
bio.seek(0)
2518-
self.handles.handle = self._output_file
2519-
self.handles.handle.write(bio.read()) # type: ignore[arg-type]
2520-
bio.close()
2518+
bio, self.handles.handle = self.handles.handle, self._output_file
2519+
self.handles.handle.write(bio.getvalue()) # type: ignore[arg-type]
25212520

25222521
def _write_map(self) -> None:
25232522
"""No-op, future compatibility"""

pandas/tests/io/excel/test_writers.py

+25-28
Original file line numberDiff line numberDiff line change
@@ -657,30 +657,27 @@ def test_excel_date_datetime_format(self, engine, ext, path):
657657
)
658658

659659
with tm.ensure_clean(ext) as filename2:
660-
writer1 = ExcelWriter(path)
661-
writer2 = ExcelWriter(
660+
with ExcelWriter(path) as writer1:
661+
df.to_excel(writer1, "test1")
662+
663+
with ExcelWriter(
662664
filename2,
663665
date_format="DD.MM.YYYY",
664666
datetime_format="DD.MM.YYYY HH-MM-SS",
665-
)
666-
667-
df.to_excel(writer1, "test1")
668-
df.to_excel(writer2, "test1")
669-
670-
writer1.close()
671-
writer2.close()
667+
) as writer2:
668+
df.to_excel(writer2, "test1")
672669

673-
reader1 = ExcelFile(path)
674-
reader2 = ExcelFile(filename2)
670+
with ExcelFile(path) as reader1:
671+
rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0)
675672

676-
rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0)
677-
rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0)
673+
with ExcelFile(filename2) as reader2:
674+
rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0)
678675

679-
tm.assert_frame_equal(rs1, rs2)
676+
tm.assert_frame_equal(rs1, rs2)
680677

681-
# Since the reader returns a datetime object for dates,
682-
# we need to use df_expected to check the result.
683-
tm.assert_frame_equal(rs2, df_expected)
678+
# Since the reader returns a datetime object for dates,
679+
# we need to use df_expected to check the result.
680+
tm.assert_frame_equal(rs2, df_expected)
684681

685682
def test_to_excel_interval_no_labels(self, path):
686683
# see gh-19242
@@ -862,7 +859,7 @@ def test_to_excel_unicode_filename(self, ext, path):
862859
f = open(filename, "wb")
863860
except UnicodeEncodeError:
864861
pytest.skip("No unicode file names on this system")
865-
else:
862+
finally:
866863
f.close()
867864

868865
df = DataFrame(
@@ -872,15 +869,15 @@ def test_to_excel_unicode_filename(self, ext, path):
872869
)
873870
df.to_excel(filename, "test1", float_format="%.2f")
874871

875-
reader = ExcelFile(filename)
876-
result = pd.read_excel(reader, sheet_name="test1", index_col=0)
872+
with ExcelFile(filename) as reader:
873+
result = pd.read_excel(reader, sheet_name="test1", index_col=0)
877874

878-
expected = DataFrame(
879-
[[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
880-
index=["A", "B"],
881-
columns=["X", "Y", "Z"],
882-
)
883-
tm.assert_frame_equal(result, expected)
875+
expected = DataFrame(
876+
[[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
877+
index=["A", "B"],
878+
columns=["X", "Y", "Z"],
879+
)
880+
tm.assert_frame_equal(result, expected)
884881

885882
# FIXME: dont leave commented-out
886883
# def test_to_excel_header_styling_xls(self, engine, ext):
@@ -1374,8 +1371,8 @@ def test_excelfile_fspath(self):
13741371
with tm.ensure_clean("foo.xlsx") as path:
13751372
df = DataFrame({"A": [1, 2]})
13761373
df.to_excel(path)
1377-
xl = ExcelFile(path)
1378-
result = os.fspath(xl)
1374+
with ExcelFile(path) as xl:
1375+
result = os.fspath(xl)
13791376
assert result == path
13801377

13811378
def test_excelwriter_fspath(self):

pandas/tests/io/formats/test_to_csv.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -545,12 +545,12 @@ def test_to_csv_zip_arguments(self, compression, archive_name):
545545
df.to_csv(
546546
path, compression={"method": compression, "archive_name": archive_name}
547547
)
548-
zp = ZipFile(path)
549-
expected_arcname = path if archive_name is None else archive_name
550-
expected_arcname = os.path.basename(expected_arcname)
551-
assert len(zp.filelist) == 1
552-
archived_file = os.path.basename(zp.filelist[0].filename)
553-
assert archived_file == expected_arcname
548+
with ZipFile(path) as zp:
549+
expected_arcname = path if archive_name is None else archive_name
550+
expected_arcname = os.path.basename(expected_arcname)
551+
assert len(zp.filelist) == 1
552+
archived_file = os.path.basename(zp.filelist[0].filename)
553+
assert archived_file == expected_arcname
554554

555555
@pytest.mark.parametrize("df_new_type", ["Int64"])
556556
def test_to_csv_na_rep_long_string(self, df_new_type):

pandas/tests/io/test_stata.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,7 @@ def test_invalid_timestamp(self, version):
555555
msg = "time_stamp should be datetime type"
556556
with pytest.raises(ValueError, match=msg):
557557
original.to_stata(path, time_stamp=time_stamp, version=version)
558+
assert not os.path.isfile(path)
558559

559560
def test_numeric_column_names(self):
560561
original = DataFrame(np.reshape(np.arange(25.0), (5, 5)))
@@ -1921,10 +1922,10 @@ def test_compression_dict(method, file_ext):
19211922
compression = {"method": method, "archive_name": archive_name}
19221923
df.to_stata(path, compression=compression)
19231924
if method == "zip" or file_ext == "zip":
1924-
zp = zipfile.ZipFile(path, "r")
1925-
assert len(zp.filelist) == 1
1926-
assert zp.filelist[0].filename == archive_name
1927-
fp = io.BytesIO(zp.read(zp.filelist[0]))
1925+
with zipfile.ZipFile(path, "r") as zp:
1926+
assert len(zp.filelist) == 1
1927+
assert zp.filelist[0].filename == archive_name
1928+
fp = io.BytesIO(zp.read(zp.filelist[0]))
19281929
else:
19291930
fp = path
19301931
reread = read_stata(fp, index_col="index")

0 commit comments

Comments
 (0)