Skip to content

Commit b341ca5

Browse files
authored
Backport PR #39202 on branch 1.2.x (#39255)
1 parent 3cad03f commit b341ca5

File tree

6 files changed

+76
-96
lines changed

6 files changed

+76
-96
lines changed

doc/source/whatsnew/v1.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Fixed regressions
1717
- Fixed regression in :meth:`~DataFrame.to_csv` that created corrupted zip files when there were more rows than ``chunksize`` (:issue:`38714`)
1818
- Fixed regression in :meth:`read_csv` and other read functions were the encoding error policy (``errors``) did not default to ``"replace"`` when no encoding was specified (:issue:`38989`)
1919
- Fixed regression in :func:`read_excel` with non-rawbyte file handles (:issue:`38788`)
20+
- Fixed regression in :meth:`DataFrame.to_stata` not removing the created file when an error occured (:issue:`39202`)
2021
- Fixed regression in ``DataFrame.__setitem__`` raising ``ValueError`` when expanding :class:`DataFrame` and new column is from type ``"0 - name"`` (:issue:`39010`)
2122
- Fixed regression in setting with :meth:`DataFrame.loc` raising ``ValueError`` when :class:`DataFrame` has unsorted :class:`MultiIndex` columns and indexer is a scalar (:issue:`38601`)
2223
- Fixed regression in setting with :meth:`DataFrame.loc` raising ``KeyError`` with :class:`MultiIndex` and list-like columns indexer enlarging :class:`DataFrame` (:issue:`39147`)

pandas/_testing.py

+29-45
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
import gzip
77
import operator
88
import os
9+
from pathlib import Path
10+
import random
911
import re
1012
from shutil import rmtree
1113
import string
1214
import tempfile
13-
from typing import Any, Callable, ContextManager, List, Optional, Type, Union, cast
15+
from typing import IO, Any, Callable, ContextManager, List, Optional, Type, Union, cast
1416
import warnings
1517
import zipfile
1618

@@ -578,66 +580,48 @@ def close(fignum=None):
578580

579581

580582
@contextmanager
581-
def ensure_clean(filename=None, return_filelike=False, **kwargs):
583+
def ensure_clean(filename=None, return_filelike: bool = False, **kwargs: Any):
582584
"""
583585
Gets a temporary path and agrees to remove on close.
584586
587+
This implementation does not use tempfile.mkstemp to avoid having a file handle.
588+
If the code using the returned path wants to delete the file itself, windows
589+
requires that no program has a file handle to it.
590+
585591
Parameters
586592
----------
587593
filename : str (optional)
588-
if None, creates a temporary file which is then removed when out of
589-
scope. if passed, creates temporary file with filename as ending.
594+
suffix of the created file.
590595
return_filelike : bool (default False)
591596
if True, returns a file-like which is *always* cleaned. Necessary for
592597
savefig and other functions which want to append extensions.
593598
**kwargs
594-
Additional keywords passed in for creating a temporary file.
595-
:meth:`tempFile.TemporaryFile` is used when `return_filelike` is ``True``.
596-
:meth:`tempfile.mkstemp` is used when `return_filelike` is ``False``.
597-
Note that the `filename` parameter will be passed in as the `suffix`
598-
argument to either function.
599+
Additional keywords are passed to open().
599600
600-
See Also
601-
--------
602-
tempfile.TemporaryFile
603-
tempfile.mkstemp
604601
"""
605-
filename = filename or ""
606-
fd = None
607-
608-
kwargs["suffix"] = filename
602+
folder = Path(tempfile.gettempdir())
609603

610-
if return_filelike:
611-
f = tempfile.TemporaryFile(**kwargs)
612-
613-
try:
614-
yield f
615-
finally:
616-
f.close()
617-
else:
618-
# Don't generate tempfile if using a path with directory specified.
619-
if len(os.path.dirname(filename)):
620-
raise ValueError("Can't pass a qualified name to ensure_clean()")
604+
if filename is None:
605+
filename = ""
606+
filename = (
607+
"".join(random.choices(string.ascii_letters + string.digits, k=30)) + filename
608+
)
609+
path = folder / filename
621610

622-
try:
623-
fd, filename = tempfile.mkstemp(**kwargs)
624-
except UnicodeEncodeError:
625-
import pytest
611+
path.touch()
626612

627-
pytest.skip("no unicode file names on this system")
613+
handle_or_str: Union[str, IO] = str(path)
614+
if return_filelike:
615+
kwargs.setdefault("mode", "w+b")
616+
handle_or_str = open(path, **kwargs)
628617

629-
try:
630-
yield filename
631-
finally:
632-
try:
633-
os.close(fd)
634-
except OSError:
635-
print(f"Couldn't close file descriptor: {fd} (file: {filename})")
636-
try:
637-
if os.path.exists(filename):
638-
os.remove(filename)
639-
except OSError as e:
640-
print(f"Exception on removing file: {e}")
618+
try:
619+
yield handle_or_str
620+
finally:
621+
if not isinstance(handle_or_str, str):
622+
handle_or_str.close()
623+
if path.is_file():
624+
path.unlink()
641625

642626

643627
@contextmanager

pandas/io/stata.py

+10-13
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import datetime
1414
from io import BytesIO
1515
import os
16-
from pathlib import Path
1716
import struct
1817
import sys
1918
from typing import Any, AnyStr, Dict, List, Optional, Sequence, Tuple, Union, cast
@@ -2462,8 +2461,8 @@ def write_file(self) -> None:
24622461
if self.handles.compression["method"] is not None:
24632462
# ZipFile creates a file (with the same name) for each write call.
24642463
# Write it first into a buffer and then write the buffer to the ZipFile.
2465-
self._output_file = self.handles.handle
2466-
self.handles.handle = BytesIO()
2464+
self._output_file, self.handles.handle = self.handles.handle, BytesIO()
2465+
self.handles.created_handles.append(self.handles.handle)
24672466

24682467
try:
24692468
self._write_header(
@@ -2484,20 +2483,21 @@ def write_file(self) -> None:
24842483
self._write_value_labels()
24852484
self._write_file_close_tag()
24862485
self._write_map()
2487-
except Exception as exc:
24882486
self._close()
2489-
if isinstance(self._fname, (str, Path)):
2487+
except Exception as exc:
2488+
self.handles.close()
2489+
if isinstance(self._fname, (str, os.PathLike)) and os.path.isfile(
2490+
self._fname
2491+
):
24902492
try:
24912493
os.unlink(self._fname)
24922494
except OSError:
24932495
warnings.warn(
24942496
f"This save was not successful but {self._fname} could not "
2495-
"be deleted. This file is not valid.",
2497+
"be deleted. This file is not valid.",
24962498
ResourceWarning,
24972499
)
24982500
raise exc
2499-
else:
2500-
self._close()
25012501

25022502
def _close(self) -> None:
25032503
"""
@@ -2509,11 +2509,8 @@ def _close(self) -> None:
25092509
# write compression
25102510
if self._output_file is not None:
25112511
assert isinstance(self.handles.handle, BytesIO)
2512-
bio = self.handles.handle
2513-
bio.seek(0)
2514-
self.handles.handle = self._output_file
2515-
self.handles.handle.write(bio.read()) # type: ignore[arg-type]
2516-
bio.close()
2512+
bio, self.handles.handle = self.handles.handle, self._output_file
2513+
self.handles.handle.write(bio.getvalue()) # type: ignore[arg-type]
25172514

25182515
def _write_map(self) -> None:
25192516
"""No-op, future compatibility"""

pandas/tests/io/excel/test_writers.py

+25-28
Original file line numberDiff line numberDiff line change
@@ -657,30 +657,27 @@ def test_excel_date_datetime_format(self, engine, ext, path):
657657
)
658658

659659
with tm.ensure_clean(ext) as filename2:
660-
writer1 = ExcelWriter(path)
661-
writer2 = ExcelWriter(
660+
with ExcelWriter(path) as writer1:
661+
df.to_excel(writer1, "test1")
662+
663+
with ExcelWriter(
662664
filename2,
663665
date_format="DD.MM.YYYY",
664666
datetime_format="DD.MM.YYYY HH-MM-SS",
665-
)
666-
667-
df.to_excel(writer1, "test1")
668-
df.to_excel(writer2, "test1")
669-
670-
writer1.close()
671-
writer2.close()
667+
) as writer2:
668+
df.to_excel(writer2, "test1")
672669

673-
reader1 = ExcelFile(path)
674-
reader2 = ExcelFile(filename2)
670+
with ExcelFile(path) as reader1:
671+
rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0)
675672

676-
rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0)
677-
rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0)
673+
with ExcelFile(filename2) as reader2:
674+
rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0)
678675

679-
tm.assert_frame_equal(rs1, rs2)
676+
tm.assert_frame_equal(rs1, rs2)
680677

681-
# Since the reader returns a datetime object for dates,
682-
# we need to use df_expected to check the result.
683-
tm.assert_frame_equal(rs2, df_expected)
678+
# Since the reader returns a datetime object for dates,
679+
# we need to use df_expected to check the result.
680+
tm.assert_frame_equal(rs2, df_expected)
684681

685682
def test_to_excel_interval_no_labels(self, path):
686683
# see gh-19242
@@ -862,7 +859,7 @@ def test_to_excel_unicode_filename(self, ext, path):
862859
f = open(filename, "wb")
863860
except UnicodeEncodeError:
864861
pytest.skip("No unicode file names on this system")
865-
else:
862+
finally:
866863
f.close()
867864

868865
df = DataFrame(
@@ -872,15 +869,15 @@ def test_to_excel_unicode_filename(self, ext, path):
872869
)
873870
df.to_excel(filename, "test1", float_format="%.2f")
874871

875-
reader = ExcelFile(filename)
876-
result = pd.read_excel(reader, sheet_name="test1", index_col=0)
872+
with ExcelFile(filename) as reader:
873+
result = pd.read_excel(reader, sheet_name="test1", index_col=0)
877874

878-
expected = DataFrame(
879-
[[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
880-
index=["A", "B"],
881-
columns=["X", "Y", "Z"],
882-
)
883-
tm.assert_frame_equal(result, expected)
875+
expected = DataFrame(
876+
[[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
877+
index=["A", "B"],
878+
columns=["X", "Y", "Z"],
879+
)
880+
tm.assert_frame_equal(result, expected)
884881

885882
# FIXME: dont leave commented-out
886883
# def test_to_excel_header_styling_xls(self, engine, ext):
@@ -1374,8 +1371,8 @@ def test_excelfile_fspath(self):
13741371
with tm.ensure_clean("foo.xlsx") as path:
13751372
df = DataFrame({"A": [1, 2]})
13761373
df.to_excel(path)
1377-
xl = ExcelFile(path)
1378-
result = os.fspath(xl)
1374+
with ExcelFile(path) as xl:
1375+
result = os.fspath(xl)
13791376
assert result == path
13801377

13811378
def test_excelwriter_fspath(self):

pandas/tests/io/formats/test_to_csv.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -545,12 +545,12 @@ def test_to_csv_zip_arguments(self, compression, archive_name):
545545
df.to_csv(
546546
path, compression={"method": compression, "archive_name": archive_name}
547547
)
548-
zp = ZipFile(path)
549-
expected_arcname = path if archive_name is None else archive_name
550-
expected_arcname = os.path.basename(expected_arcname)
551-
assert len(zp.filelist) == 1
552-
archived_file = os.path.basename(zp.filelist[0].filename)
553-
assert archived_file == expected_arcname
548+
with ZipFile(path) as zp:
549+
expected_arcname = path if archive_name is None else archive_name
550+
expected_arcname = os.path.basename(expected_arcname)
551+
assert len(zp.filelist) == 1
552+
archived_file = os.path.basename(zp.filelist[0].filename)
553+
assert archived_file == expected_arcname
554554

555555
@pytest.mark.parametrize("df_new_type", ["Int64"])
556556
def test_to_csv_na_rep_long_string(self, df_new_type):

pandas/tests/io/test_stata.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,7 @@ def test_invalid_timestamp(self, version):
550550
msg = "time_stamp should be datetime type"
551551
with pytest.raises(ValueError, match=msg):
552552
original.to_stata(path, time_stamp=time_stamp, version=version)
553+
assert not os.path.isfile(path)
553554

554555
def test_numeric_column_names(self):
555556
original = DataFrame(np.reshape(np.arange(25.0), (5, 5)))
@@ -1916,10 +1917,10 @@ def test_compression_dict(method, file_ext):
19161917
compression = {"method": method, "archive_name": archive_name}
19171918
df.to_stata(path, compression=compression)
19181919
if method == "zip" or file_ext == "zip":
1919-
zp = zipfile.ZipFile(path, "r")
1920-
assert len(zp.filelist) == 1
1921-
assert zp.filelist[0].filename == archive_name
1922-
fp = io.BytesIO(zp.read(zp.filelist[0]))
1920+
with zipfile.ZipFile(path, "r") as zp:
1921+
assert len(zp.filelist) == 1
1922+
assert zp.filelist[0].filename == archive_name
1923+
fp = io.BytesIO(zp.read(zp.filelist[0]))
19231924
else:
19241925
fp = path
19251926
reread = read_stata(fp, index_col="index")

0 commit comments

Comments
 (0)