diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 6aaed803c5352..85da1579b33a2 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -458,6 +458,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) +- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 512a224555577..2be7657883e88 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -2157,9 +2157,15 @@ def _write_header(self, data_label=None, time_stamp=None): time_stamp = datetime.datetime.now() elif not isinstance(time_stamp, datetime.datetime): raise ValueError("time_stamp should be datetime type") - self._file.write( - self._null_terminate(time_stamp.strftime("%d %b %Y %H:%M")) - ) + # GH #13856 + # Avoid locale-specific month conversion + months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', + 'Sep', 'Oct', 'Nov', 'Dec'] + month_lookup = {i + 1: month for i, month in enumerate(months)} + ts = (time_stamp.strftime("%d ") + + month_lookup[time_stamp.month] + + time_stamp.strftime(" %Y %H:%M")) + self._file.write(self._null_terminate(ts)) def _write_descriptors(self, typlist=None, varlist=None, srtlist=None, fmtlist=None, lbllist=None): diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index 08fcde8d3022e..8cfd5d98fe05f 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -484,9 +484,7 @@ def test_timestamp_and_label(self): data_label=data_label) with StataReader(path) as reader: - parsed_time_stamp = dt.datetime.strptime( - reader.time_stamp, ('%d %b %Y %H:%M')) - assert parsed_time_stamp == time_stamp + assert reader.time_stamp == '29 Feb 2000 14:21' assert reader.data_label == data_label def test_numeric_column_names(self):