Skip to content

Commit 5a54fc8

Browse files
committed
BUG: Remove locale conversion from Stata file date
Prevent locale from affecting Stata file date creation, which must be en_US. xref #13856
1 parent be3f2ae commit 5a54fc8

File tree

3 files changed

+23
-21
lines changed

3 files changed

+23
-21
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,7 @@ Bug Fixes
456456

457457
- Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`)
458458

459+
- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`)
459460

460461

461462

pandas/io/stata.py

+21-18
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,27 @@
99
You can find more information on http://presbrey.mit.edu/PyDTA and
1010
http://www.statsmodels.org/devel/
1111
"""
12-
import numpy as np
13-
14-
import sys
12+
import datetime
1513
import struct
16-
from dateutil.relativedelta import relativedelta
14+
import sys
1715

18-
from pandas.types.common import (is_categorical_dtype, is_datetime64_dtype,
19-
_ensure_object)
16+
import numpy as np
17+
from dateutil.relativedelta import relativedelta
18+
from pandas.lib import max_len_string_array, infer_dtype
19+
from pandas.tslib import NaT, Timestamp
2020

21+
import pandas as pd
22+
from pandas import compat, to_timedelta, to_datetime, isnull, DatetimeIndex
23+
from pandas.compat import lrange, lmap, lzip, text_type, string_types, range, \
24+
zip, BytesIO
2125
from pandas.core.base import StringMixin
2226
from pandas.core.categorical import Categorical
2327
from pandas.core.frame import DataFrame
2428
from pandas.core.series import Series
25-
import datetime
26-
from pandas import compat, to_timedelta, to_datetime, isnull, DatetimeIndex
27-
from pandas.compat import lrange, lmap, lzip, text_type, string_types, range, \
28-
zip, BytesIO
29-
from pandas.util.decorators import Appender
30-
import pandas as pd
31-
3229
from pandas.io.common import get_filepath_or_buffer, BaseIterator
33-
from pandas.lib import max_len_string_array, infer_dtype
34-
from pandas.tslib import NaT, Timestamp
30+
from pandas.types.common import (is_categorical_dtype, is_datetime64_dtype,
31+
_ensure_object)
32+
from pandas.util.decorators import Appender
3533

3634
_version_error = ("Version of given Stata file is not 104, 105, 108, "
3735
"111 (Stata 7SE), 113 (Stata 8/9), 114 (Stata 10/11), "
@@ -2157,9 +2155,14 @@ def _write_header(self, data_label=None, time_stamp=None):
21572155
time_stamp = datetime.datetime.now()
21582156
elif not isinstance(time_stamp, datetime.datetime):
21592157
raise ValueError("time_stamp should be datetime type")
2160-
self._file.write(
2161-
self._null_terminate(time_stamp.strftime("%d %b %Y %H:%M"))
2162-
)
2158+
# Avoid locale-specific month conversion
2159+
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
2160+
'Sep', 'Oct', 'Nov', 'Dec']
2161+
month_lookup = {i + 1: month for i, month in enumerate(months)}
2162+
ts = (time_stamp.strftime("%d ") +
2163+
month_lookup[time_stamp.month] +
2164+
time_stamp.strftime(" %Y %H:%M"))
2165+
self._file.write(self._null_terminate(ts))
21632166

21642167
def _write_descriptors(self, typlist=None, varlist=None, srtlist=None,
21652168
fmtlist=None, lbllist=None):

pandas/io/tests/test_stata.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -484,9 +484,7 @@ def test_timestamp_and_label(self):
484484
data_label=data_label)
485485

486486
with StataReader(path) as reader:
487-
parsed_time_stamp = dt.datetime.strptime(
488-
reader.time_stamp, ('%d %b %Y %H:%M'))
489-
assert parsed_time_stamp == time_stamp
487+
assert reader.time_stamp == '29 Feb 2000 14:21'
490488
assert reader.data_label == data_label
491489

492490
def test_numeric_column_names(self):

0 commit comments

Comments
 (0)