Skip to content

Commit e4441df

Browse files
sinhrksjreback
authored andcommitted
BUG: Format mismatch doesn't coerce to NaT (#24815)
1 parent 8eaccd8 commit e4441df

File tree

3 files changed

+70
-2
lines changed

3 files changed

+70
-2
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1550,6 +1550,7 @@ Datetimelike
15501550
- Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`).
15511551
- Fixed bug in :meth:`Series.max` with ``datetime64[ns]``-dtype failing to return ``NaT`` when nulls are present and ``skipna=False`` is passed (:issue:`24265`)
15521552
- Bug in :func:`to_datetime` where arrays of ``datetime`` objects containing both timezone-aware and timezone-naive ``datetimes`` would fail to raise ``ValueError`` (:issue:`24569`)
1553+
- Bug in :func:`to_datetime` with invalid datetime format doesn't coerce input to ``NaT`` even if ``errors='coerce'`` (:issue:`24763`)
15531554

15541555
Timedelta
15551556
^^^^^^^^^

pandas/core/tools/datetimes.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -265,15 +265,25 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
265265
except tslibs.OutOfBoundsDatetime:
266266
if errors == 'raise':
267267
raise
268-
result = arg
268+
elif errors == 'coerce':
269+
result = np.empty(arg.shape, dtype='M8[ns]')
270+
iresult = result.view('i8')
271+
iresult.fill(tslibs.iNaT)
272+
else:
273+
result = arg
269274
except ValueError:
270275
# if format was inferred, try falling back
271276
# to array_to_datetime - terminate here
272277
# for specified formats
273278
if not infer_datetime_format:
274279
if errors == 'raise':
275280
raise
276-
result = arg
281+
elif errors == 'coerce':
282+
result = np.empty(arg.shape, dtype='M8[ns]')
283+
iresult = result.view('i8')
284+
iresult.fill(tslibs.iNaT)
285+
else:
286+
result = arg
277287
except ValueError as e:
278288
# Fallback to try to convert datetime objects if timezone-aware
279289
# datetime objects are found without passing `utc=True`

pandas/tests/indexes/datetimes/test_tools.py

+57
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,63 @@ def test_datetime_invalid_datatype(self):
555555
with pytest.raises(TypeError):
556556
pd.to_datetime(pd.to_datetime)
557557

558+
@pytest.mark.parametrize('value', ["a", "00:01:99"])
559+
@pytest.mark.parametrize('infer', [True, False])
560+
@pytest.mark.parametrize('format', [None, 'H%:M%:S%'])
561+
def test_datetime_invalid_scalar(self, value, format, infer):
562+
# GH24763
563+
res = pd.to_datetime(value, errors='ignore', format=format,
564+
infer_datetime_format=infer)
565+
assert res == value
566+
567+
res = pd.to_datetime(value, errors='coerce', format=format,
568+
infer_datetime_format=infer)
569+
assert res is pd.NaT
570+
571+
with pytest.raises(ValueError):
572+
pd.to_datetime(value, errors='raise', format=format,
573+
infer_datetime_format=infer)
574+
575+
@pytest.mark.parametrize('value', ["3000/12/11 00:00:00"])
576+
@pytest.mark.parametrize('infer', [True, False])
577+
@pytest.mark.parametrize('format', [None, 'H%:M%:S%'])
578+
def test_datetime_outofbounds_scalar(self, value, format, infer):
579+
# GH24763
580+
res = pd.to_datetime(value, errors='ignore', format=format,
581+
infer_datetime_format=infer)
582+
assert res == value
583+
584+
res = pd.to_datetime(value, errors='coerce', format=format,
585+
infer_datetime_format=infer)
586+
assert res is pd.NaT
587+
588+
if format is not None:
589+
with pytest.raises(ValueError):
590+
pd.to_datetime(value, errors='raise', format=format,
591+
infer_datetime_format=infer)
592+
else:
593+
with pytest.raises(OutOfBoundsDatetime):
594+
pd.to_datetime(value, errors='raise', format=format,
595+
infer_datetime_format=infer)
596+
597+
@pytest.mark.parametrize('values', [["a"], ["00:01:99"],
598+
["a", "b", "99:00:00"]])
599+
@pytest.mark.parametrize('infer', [True, False])
600+
@pytest.mark.parametrize('format', [None, 'H%:M%:S%'])
601+
def test_datetime_invalid_index(self, values, format, infer):
602+
# GH24763
603+
res = pd.to_datetime(values, errors='ignore', format=format,
604+
infer_datetime_format=infer)
605+
tm.assert_index_equal(res, pd.Index(values))
606+
607+
res = pd.to_datetime(values, errors='coerce', format=format,
608+
infer_datetime_format=infer)
609+
tm.assert_index_equal(res, pd.DatetimeIndex([pd.NaT] * len(values)))
610+
611+
with pytest.raises(ValueError):
612+
pd.to_datetime(values, errors='raise', format=format,
613+
infer_datetime_format=infer)
614+
558615
@pytest.mark.parametrize("utc", [True, None])
559616
@pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
560617
@pytest.mark.parametrize("box", [True, False])

0 commit comments

Comments
 (0)