From 4120c5115614ef7beaa777cf6a0bc3179119ad70 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Thu, 17 Jan 2019 18:04:05 +0900 Subject: [PATCH] BUG: Format mismatch doesn't coerce to NaT --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/core/tools/datetimes.py | 14 ++++- pandas/tests/indexes/datetimes/test_tools.py | 57 ++++++++++++++++++++ 3 files changed, 70 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 3685a24d60e74..673a1b8a8581f 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1548,6 +1548,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`). - Fixed bug in :meth:`Series.max` with ``datetime64[ns]``-dtype failing to return ``NaT`` when nulls are present and ``skipna=False`` is passed (:issue:`24265`) - Bug in :func:`to_datetime` where arrays of ``datetime`` objects containing both timezone-aware and timezone-naive ``datetimes`` would fail to raise ``ValueError`` (:issue:`24569`) +- Bug in :func:`to_datetime` with invalid datetime format doesn't coerce input to ``NaT`` even if ``errors='coerce'`` (:issue:`24763`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 5b540ee88a3f3..e6478da400d76 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -265,7 +265,12 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, except tslibs.OutOfBoundsDatetime: if errors == 'raise': raise - result = arg + elif errors == 'coerce': + result = np.empty(arg.shape, dtype='M8[ns]') + iresult = result.view('i8') + iresult.fill(tslibs.iNaT) + else: + result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here @@ -273,7 +278,12 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, if not infer_datetime_format: if errors == 'raise': raise - result = arg + elif errors == 'coerce': + result = np.empty(arg.shape, dtype='M8[ns]') + iresult = result.view('i8') + iresult.fill(tslibs.iNaT) + else: + result = arg except ValueError as e: # Fallback to try to convert datetime objects if timezone-aware # datetime objects are found without passing `utc=True` diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 50c8f8d4c1f4c..bec2fa66c43cd 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -555,6 +555,63 @@ def test_datetime_invalid_datatype(self): with pytest.raises(TypeError): pd.to_datetime(pd.to_datetime) + @pytest.mark.parametrize('value', ["a", "00:01:99"]) + @pytest.mark.parametrize('infer', [True, False]) + @pytest.mark.parametrize('format', [None, 'H%:M%:S%']) + def test_datetime_invalid_scalar(self, value, format, infer): + # GH24763 + res = pd.to_datetime(value, errors='ignore', format=format, + infer_datetime_format=infer) + assert res == value + + res = pd.to_datetime(value, errors='coerce', format=format, + infer_datetime_format=infer) + assert res is pd.NaT + + with pytest.raises(ValueError): + pd.to_datetime(value, errors='raise', format=format, + infer_datetime_format=infer) + + @pytest.mark.parametrize('value', ["3000/12/11 00:00:00"]) + @pytest.mark.parametrize('infer', [True, False]) + @pytest.mark.parametrize('format', [None, 'H%:M%:S%']) + def test_datetime_outofbounds_scalar(self, value, format, infer): + # GH24763 + res = pd.to_datetime(value, errors='ignore', format=format, + infer_datetime_format=infer) + assert res == value + + res = pd.to_datetime(value, errors='coerce', format=format, + infer_datetime_format=infer) + assert res is pd.NaT + + if format is not None: + with pytest.raises(ValueError): + pd.to_datetime(value, errors='raise', format=format, + infer_datetime_format=infer) + else: + with pytest.raises(OutOfBoundsDatetime): + pd.to_datetime(value, errors='raise', format=format, + infer_datetime_format=infer) + + @pytest.mark.parametrize('values', [["a"], ["00:01:99"], + ["a", "b", "99:00:00"]]) + @pytest.mark.parametrize('infer', [True, False]) + @pytest.mark.parametrize('format', [None, 'H%:M%:S%']) + def test_datetime_invalid_index(self, values, format, infer): + # GH24763 + res = pd.to_datetime(values, errors='ignore', format=format, + infer_datetime_format=infer) + tm.assert_index_equal(res, pd.Index(values)) + + res = pd.to_datetime(values, errors='coerce', format=format, + infer_datetime_format=infer) + tm.assert_index_equal(res, pd.DatetimeIndex([pd.NaT] * len(values))) + + with pytest.raises(ValueError): + pd.to_datetime(values, errors='raise', format=format, + infer_datetime_format=infer) + @pytest.mark.parametrize("utc", [True, None]) @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None]) @pytest.mark.parametrize("box", [True, False])