pandas-dev · jreback · Dec 18, 2018 · Dec 17, 2018
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1576,6 +1576,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
 - :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`)
 - Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`)
 - Bug in :func:`pandas.io.json.json_normalize` that caused it to raise ``TypeError`` when two consecutive elements of ``record_path`` are dicts (:issue:`22706`)
+- Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`)
 - Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`)
 
 Plotting

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -12,6 +12,7 @@
 
 from collections import OrderedDict
 import datetime
+import os
 import struct
 import sys
 import warnings
@@ -23,7 +24,8 @@
 from pandas._libs.tslibs import NaT, Timestamp
 from pandas._libs.writers import max_len_string_array
 from pandas.compat import (
-    BytesIO, lmap, lrange, lzip, range, string_types, text_type, zip)
+    BytesIO, ResourceWarning, lmap, lrange, lzip, range, string_types,
+    text_type, zip)
 from pandas.util._decorators import Appender, deprecate_kwarg
 
 from pandas.core.dtypes.common import (
@@ -2209,7 +2211,17 @@ def write_file(self):
             self._write_value_labels()
             self._write_file_close_tag()
             self._write_map()
-        finally:
+        except Exception as exc:
+            self._close()
+            try:
+                if self._own_file:
+                    os.unlink(self._fname)
+            except Exception:
+                warnings.warn('This save was not successful but {0} could not '
+                              'be deleted.  This file is not '
+                              'valid.'.format(self._fname), ResourceWarning)
+            raise exc
+        else:
             self._close()
 
     def _close(self):

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -16,7 +16,7 @@
 import pandas as pd
 import pandas.util.testing as tm
 import pandas.compat as compat
-from pandas.compat import iterkeys, PY3
+from pandas.compat import iterkeys, PY3, ResourceWarning
 from pandas.core.dtypes.common import is_categorical_dtype
 from pandas.core.frame import DataFrame, Series
 from pandas.io.parsers import read_csv
@@ -1547,6 +1547,16 @@ def test_all_none_exception(self, version):
         assert 'Only string-like' in excinfo.value.args[0]
         assert 'Column `none`' in excinfo.value.args[0]
 
+    @pytest.mark.parametrize('version', [114, 117])
+    def test_invalid_file_not_written(self, version):
+        content = 'Here is one __�__ Another one __·__ Another one __½__'
+        df = DataFrame([content], columns=['invalid'])
+        expected_exc = UnicodeEncodeError if PY3 else UnicodeDecodeError
+        with tm.ensure_clean() as path:
+            with pytest.raises(expected_exc):
+                with tm.assert_produces_warning(ResourceWarning):
+                    df.to_stata(path)
+
     def test_strl_latin1(self):
         # GH 23573, correct GSO data to reflect correct size
         output = DataFrame([[u'pandas'] * 2, [u'þâÑÐÅ§'] * 2],