pandas-dev · mroeschke · Nov 8, 2022 · Nov 7, 2022 · Nov 7, 2022
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -249,6 +249,7 @@ Other API changes
 - Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`)
 - Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)
 - Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`)
+- Changed behavior of :class:`Index` construct with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -2454,6 +2454,16 @@ def maybe_convert_objects(ndarray[object] objects,
         object val
         float64_t fnan = np.nan
 
+    if dtype_if_all_nat is not None:
+        # in practice we don't expect to ever pass dtype_if_all_nat
+        #  without both convert_datetime and convert_timedelta, so disallow
+        #  it to avoid needing to handle it below.
+        if not convert_datetime or not convert_timedelta:
+            raise ValueError(
+                "Cannot specify 'dtype_if_all_nat' without convert_datetime=True "
+                "and convert_timedelta=True"
+            )
+
     n = len(objects)
 
     floats = cnp.PyArray_EMPTY(1, objects.shape, cnp.NPY_FLOAT64, 0)
@@ -2491,7 +2501,7 @@ def maybe_convert_objects(ndarray[object] objects,
             if not (convert_datetime or convert_timedelta or convert_period):
                 seen.object_ = True
                 break
-        elif val is np.nan:
+        elif util.is_nan(val):
             seen.nan_ = True
             mask[i] = True
             floats[i] = complexes[i] = val
@@ -2641,6 +2651,38 @@ def maybe_convert_objects(ndarray[object] objects,
 
         seen.object_ = True
 
+    if seen.nat_:
+        if not seen.object_ and not seen.numeric_ and not seen.bool_:
+            # all NaT, None, or nan (at least one NaT)
+            # see GH#49340 for discussion of desired behavior
+            dtype = dtype_if_all_nat
+            if cnp.PyArray_DescrCheck(dtype):
+                # i.e. isinstance(dtype, np.dtype)
+                if dtype.kind not in ["m", "M"]:
+                    raise ValueError(dtype)
+                else:
+                    res = np.empty((<object>objects).shape, dtype=dtype)
+                    res[:] = NPY_NAT
+                    return res
+            elif dtype is not None:
+                # EA, we don't expect to get here, but _could_ implement
+                raise NotImplementedError(dtype)
+            elif convert_datetime and convert_timedelta:
+                # we don't guess
+                seen.object_ = True
+            elif convert_datetime:
+                res = np.empty((<object>objects).shape, dtype="M8[ns]")
+                res[:] = NPY_NAT
+                return res
+            elif convert_timedelta:
+                res = np.empty((<object>objects).shape, dtype="m8[ns]")
+                res[:] = NPY_NAT
+                return res
+            else:
+                seen.object_ = True
+        else:
+            seen.object_ = True
+
     if not seen.object_:
         result = None
         if not safe:
@@ -2666,20 +2708,6 @@ def maybe_convert_objects(ndarray[object] objects,
                     elif seen.timedelta_:
                         if not seen.numeric_:
                             result = timedeltas
-                    elif seen.nat_:
-                        if not seen.numeric_:
-                            if convert_datetime and convert_timedelta:
-                                dtype = dtype_if_all_nat
-                                if dtype is not None:
-                                    # otherwise we keep object dtype
-                                    result = _infer_all_nats(
-                                        dtype, datetimes, timedeltas
-                                    )
-
-                            elif convert_datetime:
-                                result = datetimes
-                            elif convert_timedelta:
-                                result = timedeltas
                     else:
                         if seen.complex_:
                             result = complexes
@@ -2711,20 +2739,6 @@ def maybe_convert_objects(ndarray[object] objects,
                     elif seen.timedelta_:
                         if not seen.numeric_:
                             result = timedeltas
-                    elif seen.nat_:
-                        if not seen.numeric_:
-                            if convert_datetime and convert_timedelta:
-                                dtype = dtype_if_all_nat
-                                if dtype is not None:
-                                    # otherwise we keep object dtype
-                                    result = _infer_all_nats(
-                                        dtype, datetimes, timedeltas
-                                    )
-
-                            elif convert_datetime:
-                                result = datetimes
-                            elif convert_timedelta:
-                                result = timedeltas
                     else:
                         if seen.complex_:
                             if not seen.int_:
@@ -2751,27 +2765,6 @@ def maybe_convert_objects(ndarray[object] objects,
     return objects
 
 
-cdef _infer_all_nats(dtype, ndarray datetimes, ndarray timedeltas):
-    """
-    If we have all-NaT values, cast these to the given dtype.
-    """
-    if cnp.PyArray_DescrCheck(dtype):
-        # i.e. isinstance(dtype, np.dtype):
-        if dtype == "M8[ns]":
-            result = datetimes
-        elif dtype == "m8[ns]":
-            result = timedeltas
-        else:
-            raise ValueError(dtype)
-    else:
-        # ExtensionDtype
-        cls = dtype.construct_array_type()
-        i8vals = cnp.PyArray_EMPTY(1, datetimes.shape, cnp.NPY_INT64, 0)
-        i8vals.fill(NPY_NAT)
-        result = cls(i8vals, dtype=dtype)
-    return result
-
-
 class _NoDefault(Enum):
     # We make this an Enum
     # 1) because it round-trips through pickle correctly (see GH#40397)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1231,34 +1231,23 @@ def maybe_infer_to_datetimelike(
     if not len(v):
         return value
 
-    inferred_type = lib.infer_datetimelike_array(ensure_object(v))
-
-    if inferred_type in ["period", "interval", "timedelta", "datetime"]:
-        # Incompatible return value type (got "Union[ExtensionArray, ndarray]",
-        # expected "Union[ndarray, DatetimeArray, TimedeltaArray, PeriodArray,
-        # IntervalArray]")
-        return lib.maybe_convert_objects(  # type: ignore[return-value]
-            v,
-            convert_period=True,
-            convert_interval=True,
-            convert_timedelta=True,
-            convert_datetime=True,
-            dtype_if_all_nat=np.dtype("M8[ns]"),
-        )
-
-    elif inferred_type == "nat":
-        # if all NaT, return as datetime
-        # only reached if we have at least 1 NaT and the rest (NaT or None or np.nan)
-        # This is slightly different from what we'd get with maybe_convert_objects,
-        #  which only converts of all-NaT
-        from pandas.core.arrays.datetimes import sequence_to_datetimes
-
-        # Incompatible types in assignment (expression has type "DatetimeArray",
-        # variable has type "ndarray[Any, Any]")
-        value = sequence_to_datetimes(v)  # type: ignore[assignment]
-        assert value.dtype == "M8[ns]"
-
-    return value
+    out = lib.maybe_convert_objects(
+        v,
+        convert_period=True,
+        convert_interval=True,
+        convert_timedelta=True,
+        convert_datetime=True,
+        dtype_if_all_nat=np.dtype("M8[ns]"),
+    )
+    if out.dtype.kind in ["i", "u", "f", "b", "c"]:
+        # Here we do not convert numeric dtypes, as if we wanted that,
+        #  numpy would have done it for us.
+        #  See also _maybe_cast_data_without_dtype
+        return v
+    # Incompatible return value type (got "Union[ExtensionArray, ndarray[Any, Any]]",
+    # expected "Union[ndarray[Any, Any], DatetimeArray, TimedeltaArray, PeriodArray,
+    # IntervalArray]")
+    return out  # type: ignore[return-value]
 
 
 def maybe_cast_to_datetime(

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -701,6 +701,29 @@ def test_convert_int_overflow(self, value):
         result = lib.maybe_convert_objects(arr)
         tm.assert_numpy_array_equal(arr, result)
 
+    @pytest.mark.parametrize("val", [None, np.nan, float("nan")])
+    @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
+    def test_maybe_convert_objects_nat_inference(self, val, dtype):
+        dtype = np.dtype(dtype)
+        vals = np.array([pd.NaT, val], dtype=object)
+        result = lib.maybe_convert_objects(
+            vals,
+            convert_datetime=True,
+            convert_timedelta=True,
+            dtype_if_all_nat=dtype,
+        )
+        assert result.dtype == dtype
+        assert np.isnat(result).all()
+
+        result = lib.maybe_convert_objects(
+            vals[::-1],
+            convert_datetime=True,
+            convert_timedelta=True,
+            dtype_if_all_nat=dtype,
+        )
+        assert result.dtype == dtype
+        assert np.isnat(result).all()
+
     @pytest.mark.parametrize(
         "value, expected_dtype",
         [

diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py
@@ -39,6 +39,24 @@
 
 
 class TestIndexConstructorInference:
+    @pytest.mark.parametrize("val", [NaT, None, np.nan, float("nan")])
+    def test_infer_nat(self, val):
+        # GH#49340 all NaT/None/nan and at least 1 NaT -> datetime64[ns],
+        #  matching Series behavior
+        values = [NaT, val]
+
+        idx = Index(values)
+        assert idx.dtype == "datetime64[ns]" and idx.isna().all()
+
+        idx = Index(values[::-1])
+        assert idx.dtype == "datetime64[ns]" and idx.isna().all()
+
+        idx = Index(np.array(values, dtype=object))
+        assert idx.dtype == "datetime64[ns]" and idx.isna().all()
+
+        idx = Index(np.array(values, dtype=object)[::-1])
+        assert idx.dtype == "datetime64[ns]" and idx.isna().all()
+
     @pytest.mark.parametrize("na_value", [None, np.nan])
     @pytest.mark.parametrize("vtype", [list, tuple, iter])
     def test_construction_list_tuples_nan(self, na_value, vtype):