Skip to content

Commit 917c98f

Browse files
committed
BUG: Bug in to_datetime with a format and coerce=True not raising (GH5195)
1 parent f8e0109 commit 917c98f

File tree

4 files changed

+32
-2
lines changed

4 files changed

+32
-2
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,7 @@ Bug Fixes
593593
- Compound dtypes in a constructor raise ``NotImplementedError`` (:issue:`5191`)
594594
- Bug in comparing duplicate frames (:issue:`4421`) related
595595
- Bug in describe on duplicate frames
596+
- Bug in ``to_datetime`` with a format and ``coerce=True`` not raising (:issue:`5195`)
596597

597598
pandas 0.12.0
598599
-------------

pandas/tseries/tests/test_timeseries.py

+23
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,29 @@ def test_to_datetime_on_datetime64_series(self):
879879
result = to_datetime(s)
880880
self.assertEquals(result[0], s[0])
881881

882+
def test_to_datetime_with_apply(self):
883+
884+
# this is only locale tested with US/None locales
885+
import locale
886+
(lang,encoding) = locale.getlocale()
887+
if lang is not None:
888+
raise nose.SkipTest("format codes cannot work with a locale of {0}".format(lang))
889+
890+
# GH 5195
891+
# with a format and coerce a single item to_datetime fails
892+
td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1,2,3])
893+
expected = pd.to_datetime(td, format='%b %y')
894+
result = td.apply(pd.to_datetime, format='%b %y')
895+
assert_series_equal(result, expected)
896+
897+
td = pd.Series(['May 04', 'Jun 02', ''], index=[1,2,3])
898+
self.assertRaises(ValueError, lambda : pd.to_datetime(td,format='%b %y'))
899+
self.assertRaises(ValueError, lambda : td.apply(pd.to_datetime, format='%b %y'))
900+
expected = pd.to_datetime(td, format='%b %y', coerce=True)
901+
902+
result = td.apply(lambda x: pd.to_datetime(x, format='%b %y', coerce=True))
903+
assert_series_equal(result, expected)
904+
882905
def test_nat_vector_field_access(self):
883906
idx = DatetimeIndex(['1/1/2000', None, None, '1/4/2000'])
884907

pandas/tseries/tools.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def _convert_listlike(arg, box):
112112

113113
# fallback
114114
if result is None:
115-
result = tslib.array_strptime(arg, format)
115+
result = tslib.array_strptime(arg, format, coerce=coerce)
116116
else:
117117
result = tslib.array_to_datetime(arg, raise_=errors == 'raise',
118118
utc=utc, dayfirst=dayfirst,

pandas/tslib.pyx

+7-1
Original file line numberDiff line numberDiff line change
@@ -1174,7 +1174,7 @@ def repr_timedelta64(object value):
11741174

11751175
return "%s%02d:%02d:%s" % (sign_pretty, hours, minutes, seconds_pretty)
11761176

1177-
def array_strptime(ndarray[object] values, object fmt):
1177+
def array_strptime(ndarray[object] values, object fmt, coerce=False):
11781178
cdef:
11791179
Py_ssize_t i, n = len(values)
11801180
pandas_datetimestruct dts
@@ -1237,9 +1237,15 @@ def array_strptime(ndarray[object] values, object fmt):
12371237
for i in range(n):
12381238
found = format_regex.match(values[i])
12391239
if not found:
1240+
if coerce:
1241+
iresult[i] = iNaT
1242+
continue
12401243
raise ValueError("time data %r does not match format %r" %
12411244
(values[i], fmt))
12421245
if len(values[i]) != found.end():
1246+
if coerce:
1247+
iresult[i] = iNaT
1248+
continue
12431249
raise ValueError("unconverted data remains: %s" %
12441250
values[i][found.end():])
12451251
year = 1900

0 commit comments

Comments
 (0)