diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 1936404b75602..03bd1b996955a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -942,6 +942,7 @@ cdef class Seen: cdef: bint int_ # seen_int + bint nat_ # seen nat bint bool_ # seen_bool bint null_ # seen_null bint uint_ # seen_uint (unsigned integer) @@ -965,6 +966,7 @@ cdef class Seen: initial methods to convert to numeric fail. """ self.int_ = 0 + self.nat_ = 0 self.bool_ = 0 self.null_ = 0 self.uint_ = 0 @@ -1044,11 +1046,13 @@ cdef class Seen: @property def is_bool(self): - return not (self.datetime_ or self.numeric_ or self.timedelta_) + return not (self.datetime_ or self.numeric_ or self.timedelta_ + or self.nat_) @property def is_float_or_complex(self): - return not (self.bool_ or self.datetime_ or self.timedelta_) + return not (self.bool_ or self.datetime_ or self.timedelta_ + or self.nat_) cdef _try_infer_map(v): @@ -1947,12 +1951,11 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, seen.null_ = 1 floats[i] = complexes[i] = fnan elif val is NaT: + seen.nat_ = 1 if convert_datetime: idatetimes[i] = NPY_NAT - seen.datetime_ = 1 if convert_timedelta: itimedeltas[i] = NPY_NAT - seen.timedelta_ = 1 if not (convert_datetime or convert_timedelta): seen.object_ = 1 break @@ -2046,11 +2049,20 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, else: if not seen.bool_: if seen.datetime_: - if not seen.numeric_: + if not seen.numeric_ and not seen.timedelta_: return datetimes elif seen.timedelta_: if not seen.numeric_: return timedeltas + elif seen.nat_: + if not seen.numeric_: + if convert_datetime and convert_timedelta: + # TODO: array full of NaT ambiguity resolve here needed + pass + elif convert_datetime: + return datetimes + elif convert_timedelta: + return timedeltas else: if seen.complex_: return complexes @@ -2077,11 +2089,20 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, else: if not seen.bool_: if seen.datetime_: - if not seen.numeric_: + if not seen.numeric_ and not seen.timedelta_: return datetimes elif seen.timedelta_: if not seen.numeric_: return timedeltas + elif seen.nat_: + if not seen.numeric_: + if convert_datetime and convert_timedelta: + # TODO: array full of NaT ambiguity resolve here needed + pass + elif convert_datetime: + return datetimes + elif convert_timedelta: + return timedeltas else: if seen.complex_: if not seen.int_: diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 4d688976cd50b..ff48ae9b3c2e5 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -531,6 +531,25 @@ def test_maybe_convert_objects_uint64(self): exp = np.array([2 ** 63, -1], dtype=object) tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + def test_maybe_convert_objects_datetime(self): + # GH27438 + arr = np.array( + [np.datetime64("2000-01-01"), np.timedelta64(1, "s")], dtype=object + ) + exp = arr.copy() + out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) + tm.assert_numpy_array_equal(out, exp) + + arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object) + exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]") + out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) + tm.assert_numpy_array_equal(out, exp) + + arr = np.array([np.timedelta64(1, "s"), np.nan], dtype=object) + exp = arr.copy() + out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) + tm.assert_numpy_array_equal(out, exp) + def test_mixed_dtypes_remain_object_array(self): # GH14956 array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object)