Skip to content

Commit 8d2b2f7

Browse files
jbrockmendelmliu08
authored andcommitted
API: allow mixed-datetimes-and-ints in to_datetime, DatetimeIndex (pandas-dev#49348)
* API: allow mixed-datetimes-and-ints in to_datetime, DatetimeIndex * typo fixup * typo fixup, update import * mypy fixup
1 parent 6ce7787 commit 8d2b2f7

File tree

8 files changed

+28
-61
lines changed

8 files changed

+28
-61
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ Other API changes
335335
- The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`)
336336
- When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`)
337337
- :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`)
338+
- :func:`to_datetime` and :class:`DatetimeIndex` now allow sequences containing both ``datetime`` objects and numeric entries, matching :class:`Series` behavior (:issue:`49037`)
338339
- :func:`pandas.api.dtypes.is_string_dtype` now only returns ``True`` for array-likes with ``dtype=object`` when the elements are inferred to be strings (:issue:`15585`)
339340
- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`)
340341
- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)

pandas/_libs/tslib.pyi

-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ def array_to_datetime(
2424
yearfirst: bool = ...,
2525
utc: bool = ...,
2626
require_iso8601: bool = ...,
27-
allow_mixed: bool = ...,
2827
) -> tuple[np.ndarray, tzinfo | None]: ...
2928

3029
# returned ndarray may be object dtype or datetime64[ns]

pandas/_libs/tslib.pyx

-9
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,6 @@ cpdef array_to_datetime(
446446
bint yearfirst=False,
447447
bint utc=False,
448448
bint require_iso8601=False,
449-
bint allow_mixed=False,
450449
):
451450
"""
452451
Converts a 1D array of date-like values to a numpy array of either:
@@ -475,8 +474,6 @@ cpdef array_to_datetime(
475474
indicator whether the dates should be UTC
476475
require_iso8601 : bool, default False
477476
indicator whether the datetime string should be iso8601
478-
allow_mixed : bool, default False
479-
Whether to allow mixed datetimes and integers.
480477
481478
Returns
482479
-------
@@ -710,12 +707,6 @@ cpdef array_to_datetime(
710707
val = values[i]
711708
if is_integer_object(val) or is_float_object(val):
712709
result[i] = NPY_NAT
713-
elif allow_mixed:
714-
pass
715-
elif is_raise:
716-
raise ValueError("mixed datetimes and integers in passed array")
717-
else:
718-
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
719710

720711
if seen_datetime_offset and not utc_convert:
721712
# GH#17697

pandas/core/arrays/datetimes.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -1937,10 +1937,7 @@ def sequence_to_datetimes(data) -> DatetimeArray:
19371937
"""
19381938
Parse/convert the passed data to either DatetimeArray or np.ndarray[object].
19391939
"""
1940-
result, tz, freq = _sequence_to_dt64ns(
1941-
data,
1942-
allow_mixed=True,
1943-
)
1940+
result, tz, freq = _sequence_to_dt64ns(data)
19441941

19451942
unit = np.datetime_data(result.dtype)[0]
19461943
dtype = tz_to_dtype(tz, unit)
@@ -1956,7 +1953,6 @@ def _sequence_to_dt64ns(
19561953
dayfirst: bool = False,
19571954
yearfirst: bool = False,
19581955
ambiguous: TimeAmbiguous = "raise",
1959-
allow_mixed: bool = False,
19601956
):
19611957
"""
19621958
Parameters
@@ -1968,8 +1964,6 @@ def _sequence_to_dt64ns(
19681964
yearfirst : bool, default False
19691965
ambiguous : str, bool, or arraylike, default 'raise'
19701966
See pandas._libs.tslibs.tzconversion.tz_localize_to_utc.
1971-
allow_mixed : bool, default False
1972-
Interpret integers as timestamps when datetime objects are also present.
19731967
19741968
Returns
19751969
-------
@@ -2020,7 +2014,6 @@ def _sequence_to_dt64ns(
20202014
dayfirst=dayfirst,
20212015
yearfirst=yearfirst,
20222016
allow_object=False,
2023-
allow_mixed=allow_mixed,
20242017
)
20252018
if tz and inferred_tz:
20262019
# two timezones: convert to intended from base UTC repr
@@ -2109,7 +2102,6 @@ def objects_to_datetime64ns(
21092102
errors: DateTimeErrorChoices = "raise",
21102103
require_iso8601: bool = False,
21112104
allow_object: bool = False,
2112-
allow_mixed: bool = False,
21132105
):
21142106
"""
21152107
Convert data to array of timestamps.
@@ -2126,8 +2118,6 @@ def objects_to_datetime64ns(
21262118
allow_object : bool
21272119
Whether to return an object-dtype ndarray instead of raising if the
21282120
data contains more than one timezone.
2129-
allow_mixed : bool, default False
2130-
Interpret integers as timestamps when datetime objects are also present.
21312121
21322122
Returns
21332123
-------
@@ -2156,7 +2146,6 @@ def objects_to_datetime64ns(
21562146
dayfirst=dayfirst,
21572147
yearfirst=yearfirst,
21582148
require_iso8601=require_iso8601,
2159-
allow_mixed=allow_mixed,
21602149
)
21612150
result = result.reshape(data.shape, order=order)
21622151
except OverflowError as err:

pandas/core/dtypes/cast.py

+12-25
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@
5858
is_complex,
5959
is_complex_dtype,
6060
is_datetime64_dtype,
61-
is_dtype_equal,
6261
is_extension_array_dtype,
6362
is_float,
6463
is_float_dtype,
@@ -1222,7 +1221,7 @@ def maybe_cast_to_datetime(
12221221
Caller is responsible for handling ExtensionDtype cases and non dt64/td64
12231222
cases.
12241223
"""
1225-
from pandas.core.arrays.datetimes import sequence_to_datetimes
1224+
from pandas.core.arrays.datetimes import DatetimeArray
12261225
from pandas.core.arrays.timedeltas import TimedeltaArray
12271226

12281227
assert dtype.kind in ["m", "M"]
@@ -1238,36 +1237,24 @@ def maybe_cast_to_datetime(
12381237
res = TimedeltaArray._from_sequence(value, dtype=dtype)
12391238
return res
12401239

1241-
if is_datetime64_dtype(dtype):
1242-
# Incompatible types in assignment (expression has type
1243-
# "Union[dtype[Any], ExtensionDtype]", variable has type
1244-
# "Optional[dtype[Any]]")
1240+
else:
1241+
# error: Incompatible types in assignment (expression has type
1242+
# "Union[dtype[Any], ExtensionDtype]", variable has type "Optional[dtype[Any]]")
12451243
dtype = _ensure_nanosecond_dtype(dtype) # type: ignore[assignment]
12461244

1247-
value = np.array(value, copy=False)
1248-
1249-
# we have an array of datetime or timedeltas & nulls
1250-
if value.size or not is_dtype_equal(value.dtype, dtype):
1251-
_disallow_mismatched_datetimelike(value, dtype)
1252-
1253-
dta = sequence_to_datetimes(value)
1254-
# GH 25843: Remove tz information since the dtype
1255-
# didn't specify one
1256-
1257-
if dta.tz is not None:
1245+
try:
1246+
dta = DatetimeArray._from_sequence(value, dtype=dtype)
1247+
except ValueError as err:
1248+
# We can give a Series-specific exception message.
1249+
if "cannot supply both a tz and a timezone-naive dtype" in str(err):
12581250
raise ValueError(
12591251
"Cannot convert timezone-aware data to "
12601252
"timezone-naive dtype. Use "
12611253
"pd.Series(values).dt.tz_localize(None) instead."
1262-
)
1263-
1264-
# TODO(2.0): Do this astype in sequence_to_datetimes to
1265-
# avoid potential extra copy?
1266-
dta = dta.astype(dtype, copy=False)
1267-
return dta
1254+
) from err
1255+
raise
12681256

1269-
# at this point we have converted or raised in all cases where we had a list
1270-
return cast(ArrayLike, value)
1257+
return dta
12711258

12721259

12731260
def sanitize_to_nanoseconds(values: np.ndarray, copy: bool = False) -> np.ndarray:

pandas/tests/frame/methods/test_combine_first.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,8 @@
66
from pandas.compat import pa_version_under7p0
77
from pandas.errors import PerformanceWarning
88

9-
from pandas.core.dtypes.cast import (
10-
find_common_type,
11-
is_dtype_equal,
12-
)
9+
from pandas.core.dtypes.cast import find_common_type
10+
from pandas.core.dtypes.common import is_dtype_equal
1311

1412
import pandas as pd
1513
from pandas import (

pandas/tests/frame/test_constructors.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -3021,14 +3021,11 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls):
30213021
scalar = cls("NaT", "ns")
30223022
dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls]
30233023

3024-
msg = "Cannot cast"
30253024
if cls is np.datetime64:
3026-
msg = "|".join(
3027-
[
3028-
r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]",
3029-
"Cannot cast",
3030-
]
3031-
)
3025+
msg1 = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]"
3026+
else:
3027+
msg1 = r"dtype timedelta64\[ns\] cannot be converted to datetime64\[ns\]"
3028+
msg = "|".join(["Cannot cast", msg1])
30323029

30333030
with pytest.raises(TypeError, match=msg):
30343031
constructor(scalar, dtype=dtype)

pandas/tests/tools/test_to_datetime.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -1434,9 +1434,14 @@ def test_unit_mixed(self, cache, exp, arr):
14341434
result = to_datetime(arr, errors="coerce", cache=cache)
14351435
tm.assert_index_equal(result, expected)
14361436

1437-
msg = "mixed datetimes and integers in passed array"
1438-
with pytest.raises(ValueError, match=msg):
1439-
to_datetime(arr, errors="raise", cache=cache)
1437+
# GH#49037 pre-2.0 this raised, but it always worked with Series,
1438+
# was never clear why it was disallowed
1439+
result = to_datetime(arr, errors="raise", cache=cache)
1440+
expected = Index([Timestamp(x) for x in arr], dtype="M8[ns]")
1441+
tm.assert_index_equal(result, expected)
1442+
1443+
result = DatetimeIndex(arr)
1444+
tm.assert_index_equal(result, expected)
14401445

14411446
def test_unit_rounding(self, cache):
14421447
# GH 14156 & GH 20445: argument will incur floating point errors

0 commit comments

Comments
 (0)