BUG: Remove locale conversion from Stata file date

bashtage · bashtage · commit 5a54fc81d55a · 2017-01-24T14:55:49.000Z
Prevent locale from affecting Stata file date creation, which must be en_US. xref #13856
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -456,6 +456,7 @@ Bug Fixes
 
 - Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`)
 
+- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`)
 
 
 
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -9,29 +9,27 @@
 You can find more information on http://presbrey.mit.edu/PyDTA and
 http://www.statsmodels.org/devel/
 """
-import numpy as np
-
-import sys
+import datetime
 import struct
-from dateutil.relativedelta import relativedelta
+import sys
 
-from pandas.types.common import (is_categorical_dtype, is_datetime64_dtype,
-                                 _ensure_object)
+import numpy as np
+from dateutil.relativedelta import relativedelta
+from pandas.lib import max_len_string_array, infer_dtype
+from pandas.tslib import NaT, Timestamp
 
+import pandas as pd
+from pandas import compat, to_timedelta, to_datetime, isnull, DatetimeIndex
+from pandas.compat import lrange, lmap, lzip, text_type, string_types, range, \
+    zip, BytesIO
 from pandas.core.base import StringMixin
 from pandas.core.categorical import Categorical
 from pandas.core.frame import DataFrame
 from pandas.core.series import Series
-import datetime
-from pandas import compat, to_timedelta, to_datetime, isnull, DatetimeIndex
-from pandas.compat import lrange, lmap, lzip, text_type, string_types, range, \
-    zip, BytesIO
-from pandas.util.decorators import Appender
-import pandas as pd
-
 from pandas.io.common import get_filepath_or_buffer, BaseIterator
-from pandas.lib import max_len_string_array, infer_dtype
-from pandas.tslib import NaT, Timestamp
+from pandas.types.common import (is_categorical_dtype, is_datetime64_dtype,
+                                 _ensure_object)
+from pandas.util.decorators import Appender
 
 _version_error = ("Version of given Stata file is not 104, 105, 108, "
                   "111 (Stata 7SE), 113 (Stata 8/9), 114 (Stata 10/11), "
@@ -2157,9 +2155,14 @@ def _write_header(self, data_label=None, time_stamp=None):
             time_stamp = datetime.datetime.now()
         elif not isinstance(time_stamp, datetime.datetime):
             raise ValueError("time_stamp should be datetime type")
-        self._file.write(
-            self._null_terminate(time_stamp.strftime("%d %b %Y %H:%M"))
-        )
+        # Avoid locale-specific month conversion
+        months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
+                  'Sep', 'Oct', 'Nov', 'Dec']
+        month_lookup = {i + 1: month for i, month in enumerate(months)}
+        ts = (time_stamp.strftime("%d ") +
+              month_lookup[time_stamp.month] +
+              time_stamp.strftime(" %Y %H:%M"))
+        self._file.write(self._null_terminate(ts))
 
     def _write_descriptors(self, typlist=None, varlist=None, srtlist=None,
                            fmtlist=None, lbllist=None):
diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py
@@ -484,9 +484,7 @@ def test_timestamp_and_label(self):
                               data_label=data_label)
 
             with StataReader(path) as reader:
-                parsed_time_stamp = dt.datetime.strptime(
-                    reader.time_stamp, ('%d %b %Y %H:%M'))
-                assert parsed_time_stamp == time_stamp
+                assert reader.time_stamp == '29 Feb 2000 14:21'
                 assert reader.data_label == data_label
 
     def test_numeric_column_names(self):

Original file line number	Diff line number	Diff line change
`@@ -456,6 +456,7 @@ Bug Fixes`
`456`	`456`
`457`	`457`	- Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`)
`458`	`458`
	`459`	+- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`)
`459`	`460`
`460`	`461`
`461`	`462`