Skip to content

Commit 2713873

Browse files
authored
API: Index([NaT, None]) match Series([NaT, None]) (#49566)
* API: Index([NaT, None]) match Series([NaT, None]) * mypy fixup
1 parent 49a03f4 commit 2713873

File tree

5 files changed

+102
-78
lines changed

5 files changed

+102
-78
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ Other API changes
249249
- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`)
250250
- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)
251251
- Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`)
252+
- Changed behavior of :class:`Index` construct with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
252253
-
253254

254255
.. ---------------------------------------------------------------------------

pandas/_libs/lib.pyx

+43-50
Original file line numberDiff line numberDiff line change
@@ -2454,6 +2454,16 @@ def maybe_convert_objects(ndarray[object] objects,
24542454
object val
24552455
float64_t fnan = np.nan
24562456

2457+
if dtype_if_all_nat is not None:
2458+
# in practice we don't expect to ever pass dtype_if_all_nat
2459+
# without both convert_datetime and convert_timedelta, so disallow
2460+
# it to avoid needing to handle it below.
2461+
if not convert_datetime or not convert_timedelta:
2462+
raise ValueError(
2463+
"Cannot specify 'dtype_if_all_nat' without convert_datetime=True "
2464+
"and convert_timedelta=True"
2465+
)
2466+
24572467
n = len(objects)
24582468

24592469
floats = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_FLOAT64, 0)
@@ -2491,7 +2501,7 @@ def maybe_convert_objects(ndarray[object] objects,
24912501
if not (convert_datetime or convert_timedelta or convert_period):
24922502
seen.object_ = True
24932503
break
2494-
elif val is np.nan:
2504+
elif util.is_nan(val):
24952505
seen.nan_ = True
24962506
mask[i] = True
24972507
floats[i] = complexes[i] = val
@@ -2641,6 +2651,38 @@ def maybe_convert_objects(ndarray[object] objects,
26412651

26422652
seen.object_ = True
26432653

2654+
if seen.nat_:
2655+
if not seen.object_ and not seen.numeric_ and not seen.bool_:
2656+
# all NaT, None, or nan (at least one NaT)
2657+
# see GH#49340 for discussion of desired behavior
2658+
dtype = dtype_if_all_nat
2659+
if cnp.PyArray_DescrCheck(dtype):
2660+
# i.e. isinstance(dtype, np.dtype)
2661+
if dtype.kind not in ["m", "M"]:
2662+
raise ValueError(dtype)
2663+
else:
2664+
res = np.empty((<object>objects).shape, dtype=dtype)
2665+
res[:] = NPY_NAT
2666+
return res
2667+
elif dtype is not None:
2668+
# EA, we don't expect to get here, but _could_ implement
2669+
raise NotImplementedError(dtype)
2670+
elif convert_datetime and convert_timedelta:
2671+
# we don't guess
2672+
seen.object_ = True
2673+
elif convert_datetime:
2674+
res = np.empty((<object>objects).shape, dtype="M8[ns]")
2675+
res[:] = NPY_NAT
2676+
return res
2677+
elif convert_timedelta:
2678+
res = np.empty((<object>objects).shape, dtype="m8[ns]")
2679+
res[:] = NPY_NAT
2680+
return res
2681+
else:
2682+
seen.object_ = True
2683+
else:
2684+
seen.object_ = True
2685+
26442686
if not seen.object_:
26452687
result = None
26462688
if not safe:
@@ -2666,20 +2708,6 @@ def maybe_convert_objects(ndarray[object] objects,
26662708
elif seen.timedelta_:
26672709
if not seen.numeric_:
26682710
result = timedeltas
2669-
elif seen.nat_:
2670-
if not seen.numeric_:
2671-
if convert_datetime and convert_timedelta:
2672-
dtype = dtype_if_all_nat
2673-
if dtype is not None:
2674-
# otherwise we keep object dtype
2675-
result = _infer_all_nats(
2676-
dtype, datetimes, timedeltas
2677-
)
2678-
2679-
elif convert_datetime:
2680-
result = datetimes
2681-
elif convert_timedelta:
2682-
result = timedeltas
26832711
else:
26842712
if seen.complex_:
26852713
result = complexes
@@ -2711,20 +2739,6 @@ def maybe_convert_objects(ndarray[object] objects,
27112739
elif seen.timedelta_:
27122740
if not seen.numeric_:
27132741
result = timedeltas
2714-
elif seen.nat_:
2715-
if not seen.numeric_:
2716-
if convert_datetime and convert_timedelta:
2717-
dtype = dtype_if_all_nat
2718-
if dtype is not None:
2719-
# otherwise we keep object dtype
2720-
result = _infer_all_nats(
2721-
dtype, datetimes, timedeltas
2722-
)
2723-
2724-
elif convert_datetime:
2725-
result = datetimes
2726-
elif convert_timedelta:
2727-
result = timedeltas
27282742
else:
27292743
if seen.complex_:
27302744
if not seen.int_:
@@ -2751,27 +2765,6 @@ def maybe_convert_objects(ndarray[object] objects,
27512765
return objects
27522766

27532767

2754-
cdef _infer_all_nats(dtype, ndarray datetimes, ndarray timedeltas):
2755-
"""
2756-
If we have all-NaT values, cast these to the given dtype.
2757-
"""
2758-
if cnp.PyArray_DescrCheck(dtype):
2759-
# i.e. isinstance(dtype, np.dtype):
2760-
if dtype == "M8[ns]":
2761-
result = datetimes
2762-
elif dtype == "m8[ns]":
2763-
result = timedeltas
2764-
else:
2765-
raise ValueError(dtype)
2766-
else:
2767-
# ExtensionDtype
2768-
cls = dtype.construct_array_type()
2769-
i8vals = cnp.PyArray_EMPTY(1, datetimes.shape, cnp.NPY_INT64, 0)
2770-
i8vals.fill(NPY_NAT)
2771-
result = cls(i8vals, dtype=dtype)
2772-
return result
2773-
2774-
27752768
class _NoDefault(Enum):
27762769
# We make this an Enum
27772770
# 1) because it round-trips through pickle correctly (see GH#40397)

pandas/core/dtypes/cast.py

+17-28
Original file line numberDiff line numberDiff line change
@@ -1203,34 +1203,23 @@ def maybe_infer_to_datetimelike(
12031203
if not len(v):
12041204
return value
12051205

1206-
inferred_type = lib.infer_datetimelike_array(ensure_object(v))
1207-
1208-
if inferred_type in ["period", "interval", "timedelta", "datetime"]:
1209-
# Incompatible return value type (got "Union[ExtensionArray, ndarray]",
1210-
# expected "Union[ndarray, DatetimeArray, TimedeltaArray, PeriodArray,
1211-
# IntervalArray]")
1212-
return lib.maybe_convert_objects( # type: ignore[return-value]
1213-
v,
1214-
convert_period=True,
1215-
convert_interval=True,
1216-
convert_timedelta=True,
1217-
convert_datetime=True,
1218-
dtype_if_all_nat=np.dtype("M8[ns]"),
1219-
)
1220-
1221-
elif inferred_type == "nat":
1222-
# if all NaT, return as datetime
1223-
# only reached if we have at least 1 NaT and the rest (NaT or None or np.nan)
1224-
# This is slightly different from what we'd get with maybe_convert_objects,
1225-
# which only converts of all-NaT
1226-
from pandas.core.arrays.datetimes import sequence_to_datetimes
1227-
1228-
# Incompatible types in assignment (expression has type "DatetimeArray",
1229-
# variable has type "ndarray[Any, Any]")
1230-
value = sequence_to_datetimes(v) # type: ignore[assignment]
1231-
assert value.dtype == "M8[ns]"
1232-
1233-
return value
1206+
out = lib.maybe_convert_objects(
1207+
v,
1208+
convert_period=True,
1209+
convert_interval=True,
1210+
convert_timedelta=True,
1211+
convert_datetime=True,
1212+
dtype_if_all_nat=np.dtype("M8[ns]"),
1213+
)
1214+
if out.dtype.kind in ["i", "u", "f", "b", "c"]:
1215+
# Here we do not convert numeric dtypes, as if we wanted that,
1216+
# numpy would have done it for us.
1217+
# See also _maybe_cast_data_without_dtype
1218+
return v
1219+
# Incompatible return value type (got "Union[ExtensionArray, ndarray[Any, Any]]",
1220+
# expected "Union[ndarray[Any, Any], DatetimeArray, TimedeltaArray, PeriodArray,
1221+
# IntervalArray]")
1222+
return out # type: ignore[return-value]
12341223

12351224

12361225
def maybe_cast_to_datetime(

pandas/tests/dtypes/test_inference.py

+23
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,29 @@ def test_convert_int_overflow(self, value):
701701
result = lib.maybe_convert_objects(arr)
702702
tm.assert_numpy_array_equal(arr, result)
703703

704+
@pytest.mark.parametrize("val", [None, np.nan, float("nan")])
705+
@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
706+
def test_maybe_convert_objects_nat_inference(self, val, dtype):
707+
dtype = np.dtype(dtype)
708+
vals = np.array([pd.NaT, val], dtype=object)
709+
result = lib.maybe_convert_objects(
710+
vals,
711+
convert_datetime=True,
712+
convert_timedelta=True,
713+
dtype_if_all_nat=dtype,
714+
)
715+
assert result.dtype == dtype
716+
assert np.isnat(result).all()
717+
718+
result = lib.maybe_convert_objects(
719+
vals[::-1],
720+
convert_datetime=True,
721+
convert_timedelta=True,
722+
dtype_if_all_nat=dtype,
723+
)
724+
assert result.dtype == dtype
725+
assert np.isnat(result).all()
726+
704727
@pytest.mark.parametrize(
705728
"value, expected_dtype",
706729
[

pandas/tests/indexes/test_index_new.py

+18
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,24 @@
3939

4040

4141
class TestIndexConstructorInference:
42+
@pytest.mark.parametrize("val", [NaT, None, np.nan, float("nan")])
43+
def test_infer_nat(self, val):
44+
# GH#49340 all NaT/None/nan and at least 1 NaT -> datetime64[ns],
45+
# matching Series behavior
46+
values = [NaT, val]
47+
48+
idx = Index(values)
49+
assert idx.dtype == "datetime64[ns]" and idx.isna().all()
50+
51+
idx = Index(values[::-1])
52+
assert idx.dtype == "datetime64[ns]" and idx.isna().all()
53+
54+
idx = Index(np.array(values, dtype=object))
55+
assert idx.dtype == "datetime64[ns]" and idx.isna().all()
56+
57+
idx = Index(np.array(values, dtype=object)[::-1])
58+
assert idx.dtype == "datetime64[ns]" and idx.isna().all()
59+
4260
@pytest.mark.parametrize("na_value", [None, np.nan])
4361
@pytest.mark.parametrize("vtype", [list, tuple, iter])
4462
def test_construction_list_tuples_nan(self, na_value, vtype):

0 commit comments

Comments
 (0)