diff --git a/RELEASE.rst b/RELEASE.rst index 436f9d8b833a3..f2c150341d2c6 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -155,6 +155,8 @@ pandas 0.11.1 - Fix running of bs4 tests when it is not installed (GH3605_) - Fix parsing of html table (GH3606_) - ``read_html()`` now only allows a single backend: ``html5lib`` (GH3616_) + - ``convert_objects`` with ``convert_dates='coerce'`` was parsing some single-letter strings + into today's date .. _GH3164: https://github.com/pydata/pandas/issues/3164 .. _GH2786: https://github.com/pydata/pandas/issues/2786 diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 5cff7f85593a6..75b8d1dc69452 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -3509,6 +3509,15 @@ def test_convert_objects(self): #result = r.convert_objects(convert_dates=True,convert_numeric=False) #self.assert_(result.dtype == 'M8[ns]') + # dateutil parses some single letters into today's value as a date + for x in 'abcdefghijklmnopqrstuvwxyz': + s = Series([x]) + result = s.convert_objects(convert_dates='coerce') + assert_series_equal(result,s) + s = Series([x.upper()]) + result = s.convert_objects(convert_dates='coerce') + assert_series_equal(result,s) + def test_apply_args(self): s = Series(['foo,bar']) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index a633b9482da06..abec45b52a363 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -319,6 +319,7 @@ class Timestamp(_Timestamp): _nat_strings = set(['NaT','nat','NAT','nan','NaN','NAN']) +_not_datelike_strings = set(['a','A','m','M','p','P','t','T']) class NaTType(_NaT): """(N)ot-(A)-(T)ime, the time equivalent of NaN""" @@ -876,6 +877,14 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, &dts) _check_dts_bounds(iresult[i], &dts) except ValueError: + + # for some reason, dateutil parses some single letter len-1 strings into today's date + if len(val) == 1 and val in _not_datelike_strings: + if coerce: + iresult[i] = iNaT + continue + elif raise_: + raise try: result[i] = parse(val, dayfirst=dayfirst) except Exception: