Skip to content

Commit a23a92f

Browse files
committed
BUG: Remove locale conversion from Stata file date
Prevent locale from affecting Stata file date creation, which must be en_US. xref #13856
1 parent be3f2ae commit a23a92f

File tree

3 files changed

+11
-6
lines changed

3 files changed

+11
-6
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,7 @@ Bug Fixes
456456

457457
- Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`)
458458

459+
- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`)
459460

460461

461462

pandas/io/stata.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -2157,9 +2157,15 @@ def _write_header(self, data_label=None, time_stamp=None):
21572157
time_stamp = datetime.datetime.now()
21582158
elif not isinstance(time_stamp, datetime.datetime):
21592159
raise ValueError("time_stamp should be datetime type")
2160-
self._file.write(
2161-
self._null_terminate(time_stamp.strftime("%d %b %Y %H:%M"))
2162-
)
2160+
# GH #13856
2161+
# Avoid locale-specific month conversion
2162+
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
2163+
'Sep', 'Oct', 'Nov', 'Dec']
2164+
month_lookup = {i + 1: month for i, month in enumerate(months)}
2165+
ts = (time_stamp.strftime("%d ") +
2166+
month_lookup[time_stamp.month] +
2167+
time_stamp.strftime(" %Y %H:%M"))
2168+
self._file.write(self._null_terminate(ts))
21632169

21642170
def _write_descriptors(self, typlist=None, varlist=None, srtlist=None,
21652171
fmtlist=None, lbllist=None):

pandas/io/tests/test_stata.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -484,9 +484,7 @@ def test_timestamp_and_label(self):
484484
data_label=data_label)
485485

486486
with StataReader(path) as reader:
487-
parsed_time_stamp = dt.datetime.strptime(
488-
reader.time_stamp, ('%d %b %Y %H:%M'))
489-
assert parsed_time_stamp == time_stamp
487+
assert reader.time_stamp == '29 Feb 2000 14:21'
490488
assert reader.data_label == data_label
491489

492490
def test_numeric_column_names(self):

0 commit comments

Comments
 (0)