diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index f073d099d37ee..c2d2acd95ce43 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -31,9 +31,14 @@ import numpy as np cnp.import_array() +from pandas._libs.tslibs.dtypes cimport ( + get_supported_reso, + npy_unit_to_abbrev, +) from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, + get_datetime64_unit, import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, @@ -441,6 +446,7 @@ cpdef array_to_datetime( utc : bool, default False indicator whether the dates should be UTC creso : NPY_DATETIMEUNIT, default NPY_FR_ns + Set to NPY_FR_GENERIC to infer a resolution. Returns ------- @@ -464,14 +470,19 @@ cpdef array_to_datetime( set out_tzoffset_vals = set() tzinfo tz_out = None cnp.flatiter it = cnp.PyArray_IterNew(values) - DatetimeParseState state = DatetimeParseState() - str reso_str + NPY_DATETIMEUNIT item_reso + bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC + DatetimeParseState state = DatetimeParseState(creso) + str abbrev # specify error conditions assert is_raise or is_ignore or is_coerce - reso_str = npy_unit_to_abbrev(creso) - result = np.empty((values).shape, dtype=f"M8[{reso_str}]") + if infer_reso: + abbrev = "ns" + else: + abbrev = npy_unit_to_abbrev(creso) + result = np.empty((values).shape, dtype=f"M8[{abbrev}]") iresult = result.view("i8").ravel() for i in range(n): @@ -484,19 +495,38 @@ cpdef array_to_datetime( iresult[i] = NPY_NAT elif PyDateTime_Check(val): + if isinstance(val, _Timestamp): + item_reso = val._creso + else: + item_reso = NPY_DATETIMEUNIT.NPY_FR_us + state.update_creso(item_reso) + if infer_reso: + creso = state.creso tz_out = state.process_datetime(val, tz_out, utc_convert) iresult[i] = parse_pydatetime(val, &dts, creso=creso) elif PyDate_Check(val): + item_reso = NPY_DATETIMEUNIT.NPY_FR_s + state.update_creso(item_reso) + if infer_reso: + creso = state.creso iresult[i] = pydate_to_dt64(val, &dts, reso=creso) state.found_other = True elif is_datetime64_object(val): + item_reso = get_supported_reso(get_datetime64_unit(val)) + state.update_creso(item_reso) + if infer_reso: + creso = state.creso iresult[i] = get_datetime64_nanos(val, creso) state.found_other = True elif is_integer_object(val) or is_float_object(val): # these must be ns unit by-definition + item_reso = NPY_FR_ns + state.update_creso(item_reso) + if infer_reso: + creso = state.creso if val != val or val == NPY_NAT: iresult[i] = NPY_NAT @@ -514,11 +544,20 @@ cpdef array_to_datetime( if parse_today_now(val, &iresult[i], utc, creso): # We can't _quite_ dispatch this to convert_str_to_tsobject # bc there isn't a nice way to pass "utc" + item_reso = NPY_DATETIMEUNIT.NPY_FR_us + state.update_creso(item_reso) + if infer_reso: + creso = state.creso continue _ts = convert_str_to_tsobject( val, None, unit="ns", dayfirst=dayfirst, yearfirst=yearfirst ) + item_reso = _ts.creso + state.update_creso(item_reso) + if infer_reso: + creso = state.creso + _ts.ensure_reso(creso, val) iresult[i] = _ts.value @@ -586,6 +625,23 @@ cpdef array_to_datetime( # e.g. test_to_datetime_mixed_awareness_mixed_types raise ValueError("Cannot mix tz-aware with tz-naive values") + if infer_reso: + if state.creso_ever_changed: + # We encountered mismatched resolutions, need to re-parse with + # the correct one. + return array_to_datetime( + values, + errors=errors, + yearfirst=yearfirst, + dayfirst=dayfirst, + utc=utc, + creso=state.creso, + ) + + # Otherwise we can use the single reso that we encountered and avoid + # a second pass. + abbrev = npy_unit_to_abbrev(state.creso) + result = iresult.view(f"M8[{abbrev}]") return result, tz_out diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 3ab214196a218..dbf81013662e7 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -23,6 +23,66 @@ creso_infer = NpyDatetimeUnit.NPY_FR_GENERIC.value +class TestArrayToDatetimeResolutionInference: + # TODO: tests that include tzs, ints + + def test_infer_homogeoneous_datetimes(self): + dt = datetime(2023, 10, 27, 18, 3, 5, 678000) + arr = np.array([dt, dt, dt], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + expected = np.array([dt, dt, dt], dtype="M8[us]") + tm.assert_numpy_array_equal(result, expected) + + def test_infer_homogeoneous_date_objects(self): + dt = datetime(2023, 10, 27, 18, 3, 5, 678000) + dt2 = dt.date() + arr = np.array([None, dt2, dt2, dt2], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + expected = np.array([np.datetime64("NaT"), dt2, dt2, dt2], dtype="M8[s]") + tm.assert_numpy_array_equal(result, expected) + + def test_infer_homogeoneous_dt64(self): + dt = datetime(2023, 10, 27, 18, 3, 5, 678000) + dt64 = np.datetime64(dt, "ms") + arr = np.array([None, dt64, dt64, dt64], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + expected = np.array([np.datetime64("NaT"), dt64, dt64, dt64], dtype="M8[ms]") + tm.assert_numpy_array_equal(result, expected) + + def test_infer_homogeoneous_timestamps(self): + dt = datetime(2023, 10, 27, 18, 3, 5, 678000) + ts = Timestamp(dt).as_unit("ns") + arr = np.array([None, ts, ts, ts], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + expected = np.array([np.datetime64("NaT")] + [ts.asm8] * 3, dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + def test_infer_homogeoneous_datetimes_strings(self): + item = "2023-10-27 18:03:05.678000" + arr = np.array([None, item, item, item], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + expected = np.array([np.datetime64("NaT"), item, item, item], dtype="M8[us]") + tm.assert_numpy_array_equal(result, expected) + + def test_infer_heterogeneous(self): + dtstr = "2023-10-27 18:03:05.678000" + + arr = np.array([dtstr, dtstr[:-3], dtstr[:-7], None], dtype=object) + result, tz = tslib.array_to_datetime(arr, creso=creso_infer) + assert tz is None + expected = np.array(arr, dtype="M8[us]") + tm.assert_numpy_array_equal(result, expected) + + result, tz = tslib.array_to_datetime(arr[::-1], creso=creso_infer) + assert tz is None + tm.assert_numpy_array_equal(result, expected[::-1]) + + class TestArrayToDatetimeWithTZResolutionInference: def test_array_to_datetime_with_tz_resolution(self): tz = tzoffset("custom", 3600)