Skip to content

Commit 17583b3

Browse files
authored
API: to_datetime allow mixed numeric/datetime with errors=coerce (pandas-dev#50453)
* API: to_datetime allow mixed numeric/datetime with errors=coerce * GH ref * remove unused exp
1 parent 23c3676 commit 17583b3

File tree

3 files changed

+9
-31
lines changed

3 files changed

+9
-31
lines changed

doc/source/whatsnew/v2.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,7 @@ Other API changes
476476
- Changed behavior of :meth:`Series.quantile` and :meth:`DataFrame.quantile` with :class:`SparseDtype` to retain sparse dtype (:issue:`49583`)
477477
- When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`)
478478
- :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`)
479-
- :func:`to_datetime` and :class:`DatetimeIndex` now allow sequences containing both ``datetime`` objects and numeric entries, matching :class:`Series` behavior (:issue:`49037`)
479+
- :func:`to_datetime` and :class:`DatetimeIndex` now allow sequences containing both ``datetime`` objects and numeric entries, matching :class:`Series` behavior (:issue:`49037`, :issue:`50453`)
480480
- :func:`pandas.api.dtypes.is_string_dtype` now only returns ``True`` for array-likes with ``dtype=object`` when the elements are inferred to be strings (:issue:`15585`)
481481
- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`)
482482
- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)

pandas/_libs/tslib.pyx

-17
Original file line numberDiff line numberDiff line change
@@ -488,8 +488,6 @@ cpdef array_to_datetime(
488488
npy_datetimestruct dts
489489
NPY_DATETIMEUNIT out_bestunit
490490
bint utc_convert = bool(utc)
491-
bint seen_integer = False
492-
bint seen_datetime = False
493491
bint seen_datetime_offset = False
494492
bint is_raise = errors=="raise"
495493
bint is_ignore = errors=="ignore"
@@ -520,7 +518,6 @@ cpdef array_to_datetime(
520518
iresult[i] = NPY_NAT
521519

522520
elif PyDateTime_Check(val):
523-
seen_datetime = True
524521
if val.tzinfo is not None:
525522
found_tz = True
526523
else:
@@ -535,12 +532,10 @@ cpdef array_to_datetime(
535532
result[i] = parse_pydatetime(val, &dts, utc_convert)
536533

537534
elif PyDate_Check(val):
538-
seen_datetime = True
539535
iresult[i] = pydate_to_dt64(val, &dts)
540536
check_dts_bounds(&dts)
541537

542538
elif is_datetime64_object(val):
543-
seen_datetime = True
544539
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
545540

546541
elif is_integer_object(val) or is_float_object(val):
@@ -555,7 +550,6 @@ cpdef array_to_datetime(
555550
)
556551
return values, tz_out
557552
# these must be ns unit by-definition
558-
seen_integer = True
559553

560554
if val != val or val == NPY_NAT:
561555
iresult[i] = NPY_NAT
@@ -688,17 +682,6 @@ cpdef array_to_datetime(
688682
except TypeError:
689683
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
690684

691-
if seen_datetime and seen_integer:
692-
# we have mixed datetimes & integers
693-
694-
if is_coerce:
695-
# coerce all of the integers/floats to NaT, preserve
696-
# the datetimes and other convertibles
697-
for i in range(n):
698-
val = values[i]
699-
if is_integer_object(val) or is_float_object(val):
700-
result[i] = NPY_NAT
701-
702685
if seen_datetime_offset and not utc_convert:
703686
# GH#17697
704687
# 1) If all the offsets are equal, return one offset for

pandas/tests/tools/test_to_datetime.py

+8-13
Original file line numberDiff line numberDiff line change
@@ -1628,29 +1628,24 @@ def test_unit_with_numeric_coerce(self, cache, exp, arr, warning):
16281628
tm.assert_index_equal(result, expected)
16291629

16301630
@pytest.mark.parametrize(
1631-
"exp, arr",
1631+
"arr",
16321632
[
1633-
[
1634-
["2013-01-01", "NaT", "NaT"],
1635-
[Timestamp("20130101"), 1.434692e18, 1.432766e18],
1636-
],
1637-
[
1638-
["NaT", "NaT", "2013-01-01"],
1639-
[1.434692e18, 1.432766e18, Timestamp("20130101")],
1640-
],
1633+
[Timestamp("20130101"), 1.434692e18, 1.432766e18],
1634+
[1.434692e18, 1.432766e18, Timestamp("20130101")],
16411635
],
16421636
)
1643-
def test_unit_mixed(self, cache, exp, arr):
1644-
1637+
def test_unit_mixed(self, cache, arr):
1638+
# GH#50453 pre-2.0 with mixed numeric/datetimes and errors="coerce"
1639+
# the numeric entries would be coerced to NaT, was never clear exactly
1640+
# why.
16451641
# mixed integers/datetimes
1646-
expected = DatetimeIndex(exp)
1642+
expected = Index([Timestamp(x) for x in arr], dtype="M8[ns]")
16471643
result = to_datetime(arr, errors="coerce", cache=cache)
16481644
tm.assert_index_equal(result, expected)
16491645

16501646
# GH#49037 pre-2.0 this raised, but it always worked with Series,
16511647
# was never clear why it was disallowed
16521648
result = to_datetime(arr, errors="raise", cache=cache)
1653-
expected = Index([Timestamp(x) for x in arr], dtype="M8[ns]")
16541649
tm.assert_index_equal(result, expected)
16551650

16561651
result = DatetimeIndex(arr)

0 commit comments

Comments
 (0)