diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 767cc59882233..ef2b91d044d86 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -303,7 +303,8 @@ Bug Fixes - Bug in ``Series.astype("unicode")`` not calling ``unicode`` on the values correctly (:issue:`7758`) - Bug in ``DataFrame.as_matrix()`` with mixed ``datetime64[ns]`` and ``timedelta64[ns]`` dtypes (:issue:`7778`) - Bug in ``HDFStore.select_column()`` not preserving UTC timezone info when selecting a DatetimeIndex (:issue:`7777`) - +- Bug in ``to_datetime`` when ``format='%Y%m%d'`` and ``coerce=True`` are specified, where previously an object array was returned (rather than + a coerced time-series with ``NaT``), (:issue:`7930`) - Bug in ``DatetimeIndex`` and ``PeriodIndex`` in-place addition and subtraction cause different result from normal one (:issue:`6527`) - Bug in adding and subtracting ``PeriodIndex`` with ``PeriodIndex`` raise ``TypeError`` (:issue:`7741`) - Bug in ``combine_first`` with ``PeriodIndex`` data raises ``TypeError`` (:issue:`3367`) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index c54c133dd2afe..6dbf095189d36 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3934,6 +3934,16 @@ def test_to_datetime_format_YYYYMMDD(self): result = to_datetime(s,format='%Y%m%d') assert_series_equal(result, expected) + # coercion + # GH 7930 + s = Series([20121231, 20141231, 99991231]) + result = pd.to_datetime(s,format='%Y%m%d') + expected = np.array([ datetime(2012,12,31), datetime(2014,12,31), datetime(9999,12,31) ], dtype=object) + self.assert_numpy_array_equal(result, expected) + + result = pd.to_datetime(s,format='%Y%m%d', coerce=True) + expected = Series(['20121231','20141231','NaT'],dtype='M8[ns]') + assert_series_equal(result, expected) def test_to_datetime_format_microsecond(self): val = '01-Apr-2011 00:00:01.978' diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 457a95deb16d9..45bea00ac104f 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -262,7 +262,7 @@ def _convert_listlike(arg, box, format): # shortcut formatting here if format == '%Y%m%d': try: - result = _attempt_YYYYMMDD(arg) + result = _attempt_YYYYMMDD(arg, coerce=coerce) except: raise ValueError("cannot convert the input to '%Y%m%d' date format") @@ -313,14 +313,14 @@ def _convert_listlike(arg, box, format): class DateParseError(ValueError): pass -def _attempt_YYYYMMDD(arg): +def _attempt_YYYYMMDD(arg, coerce): """ try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, arg is a passed in as an object dtype, but could really be ints/strings with nan-like/or floats (e.g. with nan) """ def calc(carg): # calculate the actual result carg = carg.astype(object) - return lib.try_parse_year_month_day(carg/10000,carg/100 % 100, carg % 100) + return tslib.array_to_datetime(lib.try_parse_year_month_day(carg/10000,carg/100 % 100, carg % 100), coerce=coerce) def calc_with_mask(carg,mask): result = np.empty(carg.shape, dtype='M8[ns]')