diff --git a/pandas/_libs/tslib.pyi b/pandas/_libs/tslib.pyi index cc08b17e0ff5d..9a0364de2f02c 100644 --- a/pandas/_libs/tslib.pyi +++ b/pandas/_libs/tslib.pyi @@ -12,7 +12,7 @@ def format_array_from_datetime( reso: int = ..., # NPY_DATETIMEUNIT ) -> npt.NDArray[np.object_]: ... def array_with_unit_to_datetime( - values: np.ndarray, + values: npt.NDArray[np.object_], unit: str, errors: str = ..., ) -> tuple[np.ndarray, tzinfo | None]: ... diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 6f0ab6eb0d532..c1a30e03235b5 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -18,7 +18,6 @@ import_datetime() cimport numpy as cnp from numpy cimport ( - float64_t, int64_t, ndarray, ) @@ -231,7 +230,7 @@ def format_array_from_datetime( def array_with_unit_to_datetime( - ndarray values, + ndarray[object] values, str unit, str errors="coerce" ): @@ -266,70 +265,24 @@ def array_with_unit_to_datetime( cdef: Py_ssize_t i, n=len(values) int64_t mult - int prec = 0 - ndarray[float64_t] fvalues bint is_ignore = errors=="ignore" bint is_coerce = errors=="coerce" bint is_raise = errors=="raise" - bint need_to_iterate = True ndarray[int64_t] iresult ndarray[object] oresult - ndarray mask object tz = None assert is_ignore or is_coerce or is_raise if unit == "ns": - if issubclass(values.dtype.type, (np.integer, np.float_)): - result = values.astype("M8[ns]", copy=False) - else: - result, tz = array_to_datetime( - values.astype(object, copy=False), - errors=errors, - ) + result, tz = array_to_datetime( + values.astype(object, copy=False), + errors=errors, + ) return result, tz mult, _ = precision_from_unit(unit) - if is_raise: - # try a quick conversion to i8/f8 - # if we have nulls that are not type-compat - # then need to iterate - - if values.dtype.kind in ["i", "f", "u"]: - iresult = values.astype("i8", copy=False) - # fill missing values by comparing to NPY_NAT - mask = iresult == NPY_NAT - # Trying to Convert NaN to integer results in undefined - # behaviour, so handle it explicitly (see GH #48705) - if values.dtype.kind == "f": - mask |= values != values - iresult[mask] = 0 - fvalues = iresult.astype("f8") * mult - need_to_iterate = False - - if not need_to_iterate: - # check the bounds - if (fvalues < Timestamp.min.value).any() or ( - (fvalues > Timestamp.max.value).any() - ): - raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") - - if values.dtype.kind in ["i", "u"]: - result = (iresult * mult).astype("M8[ns]") - - elif values.dtype.kind == "f": - fresult = (values * mult).astype("f8") - fresult[mask] = 0 - if prec: - fresult = round(fresult, prec) - result = fresult.astype("M8[ns]", copy=False) - - iresult = result.view("i8") - iresult[mask] = NPY_NAT - - return result, tz - result = np.empty(n, dtype="M8[ns]") iresult = result.view("i8") diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 595d13b95fe12..885eb6286393b 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -27,11 +27,13 @@ OutOfBoundsDatetime, Timedelta, Timestamp, + astype_overflowsafe, iNaT, nat_strings, parsing, timezones as libtimezones, ) +from pandas._libs.tslibs.conversion import precision_from_unit from pandas._libs.tslibs.parsing import ( DateParseError, format_is_iso, @@ -557,7 +559,48 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: tz_parsed = None else: arg = np.asarray(arg) - arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) + + if arg.dtype.kind in ["i", "u"]: + # Note we can't do "f" here because that could induce unwanted + # rounding GH#14156, GH#20445 + arr = arg.astype(f"datetime64[{unit}]", copy=False) + try: + arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False) + except OutOfBoundsDatetime: + if errors == "raise": + raise + arg = arg.astype(object) + return _to_datetime_with_unit(arg, unit, name, utc, errors) + tz_parsed = None + + elif arg.dtype.kind == "f": + mult, _ = precision_from_unit(unit) + + iresult = arg.astype("i8") + mask = np.isnan(arg) | (arg == iNaT) + iresult[mask] = 0 + + fvalues = iresult.astype("f8") * mult + + if (fvalues < Timestamp.min.value).any() or ( + fvalues > Timestamp.max.value + ).any(): + if errors != "raise": + arg = arg.astype(object) + return _to_datetime_with_unit(arg, unit, name, utc, errors) + raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") + + # TODO: is fresult meaningfully different from fvalues? + fresult = (arg * mult).astype("f8") + fresult[mask] = 0 + + arr = fresult.astype("M8[ns]", copy=False) + arr[mask] = np.datetime64("NaT", "ns") + + tz_parsed = None + else: + arg = arg.astype(object, copy=False) + arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if errors == "ignore": # Index constructor _may_ infer to DatetimeIndex