diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 689f5cc7951af..a9826041e987d 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1576,6 +1576,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`) - Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`) - Bug in :func:`pandas.io.json.json_normalize` that caused it to raise ``TypeError`` when two consecutive elements of ``record_path`` are dicts (:issue:`22706`) +- Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`) - Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`) Plotting diff --git a/pandas/io/stata.py b/pandas/io/stata.py index b61ca21a53f75..fcd99e7cdce0d 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -12,6 +12,7 @@ from collections import OrderedDict import datetime +import os import struct import sys import warnings @@ -23,7 +24,8 @@ from pandas._libs.tslibs import NaT, Timestamp from pandas._libs.writers import max_len_string_array from pandas.compat import ( - BytesIO, lmap, lrange, lzip, range, string_types, text_type, zip) + BytesIO, ResourceWarning, lmap, lrange, lzip, range, string_types, + text_type, zip) from pandas.util._decorators import Appender, deprecate_kwarg from pandas.core.dtypes.common import ( @@ -2209,7 +2211,17 @@ def write_file(self): self._write_value_labels() self._write_file_close_tag() self._write_map() - finally: + except Exception as exc: + self._close() + try: + if self._own_file: + os.unlink(self._fname) + except Exception: + warnings.warn('This save was not successful but {0} could not ' + 'be deleted. This file is not ' + 'valid.'.format(self._fname), ResourceWarning) + raise exc + else: self._close() def _close(self): diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index e6544b8e3ee4b..3413b8fdf18d1 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -16,7 +16,7 @@ import pandas as pd import pandas.util.testing as tm import pandas.compat as compat -from pandas.compat import iterkeys, PY3 +from pandas.compat import iterkeys, PY3, ResourceWarning from pandas.core.dtypes.common import is_categorical_dtype from pandas.core.frame import DataFrame, Series from pandas.io.parsers import read_csv @@ -1547,6 +1547,16 @@ def test_all_none_exception(self, version): assert 'Only string-like' in excinfo.value.args[0] assert 'Column `none`' in excinfo.value.args[0] + @pytest.mark.parametrize('version', [114, 117]) + def test_invalid_file_not_written(self, version): + content = 'Here is one __�__ Another one __·__ Another one __½__' + df = DataFrame([content], columns=['invalid']) + expected_exc = UnicodeEncodeError if PY3 else UnicodeDecodeError + with tm.ensure_clean() as path: + with pytest.raises(expected_exc): + with tm.assert_produces_warning(ResourceWarning): + df.to_stata(path) + def test_strl_latin1(self): # GH 23573, correct GSO data to reflect correct size output = DataFrame([[u'pandas'] * 2, [u'þâÑÐŧ'] * 2],