diff --git a/pandas/_libs/tslibs/dtypes.pxd b/pandas/_libs/tslibs/dtypes.pxd index 84a3af0490c0a..9c0eb0f9b0945 100644 --- a/pandas/_libs/tslibs/dtypes.pxd +++ b/pandas/_libs/tslibs/dtypes.pxd @@ -7,6 +7,7 @@ cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit) cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1 cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1 +cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) cdef dict attrname_to_abbrevs diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 5da3944bfb147..9d33343c62443 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -1,5 +1,7 @@ # period frequency constants corresponding to scikits timeseries # originals +cimport cython + from enum import Enum from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT @@ -361,6 +363,46 @@ cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1: raise NotImplementedError(reso) +@cython.overflowcheck(True) +cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit): + """ + Find the factor by which we need to multiply to convert from from_unit to to_unit. + """ + if ( + from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC + or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC + ): + raise ValueError("unit-less resolutions are not supported") + if from_unit > to_unit: + raise ValueError + + if from_unit == to_unit: + return 1 + + if from_unit == NPY_DATETIMEUNIT.NPY_FR_W: + return 7 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_D: + return 24 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_h: + return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_m: + return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_s: + return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ms: + return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_us: + return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ns: + return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ps: + return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit) + elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs: + return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit) + else: + raise ValueError(from_unit, to_unit) + + cdef dict _reso_str_map = { Resolution.RESO_NS.value: "nanosecond", Resolution.RESO_US.value: "microsecond", diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index 9029f9ae14b34..2bac6c720c3b6 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -954,7 +954,7 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, case NPY_FR_s: // special case where we can simplify many expressions bc per_sec=1 - per_day = 86400000LL; + per_day = 86400LL; per_sec = 1L; // put frac in seconds @@ -1023,6 +1023,52 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td, } break; + case NPY_FR_m: + + out->days = td / 1440LL; + td -= out->days * 1440LL; + out->hrs = td / 60LL; + td -= out->hrs * 60LL; + out->min = td; + + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_h: + out->days = td / 24LL; + td -= out->days * 24LL; + out->hrs = td; + + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_D: + out->days = td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + + case NPY_FR_W: + out->days = 7 * td; + out->hrs = 0; + out->min = 0; + out->sec = 0; + out->ms = 0; + out->us = 0; + out->ns = 0; + break; + default: PyErr_SetString(PyExc_RuntimeError, "NumPy timedelta metadata is corrupted with " diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 3019be5a59c46..eb562e743fc01 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -21,3 +21,4 @@ cdef class _Timedelta(timedelta): cdef bint _has_ns(self) cdef _ensure_components(_Timedelta self) cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op) + cdef _Timedelta _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=*) diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index c547503bae273..57cc76811fea7 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -152,3 +152,4 @@ class Timedelta(timedelta): def freq(self) -> None: ... @property def is_populated(self) -> bool: ... + def _as_unit(self, unit: str, round_ok: bool = ...) -> Timedelta: ... diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 4eb1494c4d56c..593b31f0d9113 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -35,7 +35,10 @@ from pandas._libs.tslibs.conversion cimport ( cast_from_unit, precision_from_unit, ) -from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev +from pandas._libs.tslibs.dtypes cimport ( + get_conversion_factor, + npy_unit_to_abbrev, +) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, @@ -1377,6 +1380,40 @@ cdef class _Timedelta(timedelta): # exposing as classmethod for testing return _timedelta_from_value_and_reso(value, reso) + def _as_unit(self, str unit, bint round_ok=True): + dtype = np.dtype(f"m8[{unit}]") + reso = get_unit_from_dtype(dtype) + try: + return self._as_reso(reso, round_ok=round_ok) + except OverflowError as err: + raise OutOfBoundsTimedelta( + f"Cannot cast {self} to unit='{unit}' without overflow." + ) from err + + @cython.cdivision(False) + cdef _Timedelta _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=True): + cdef: + int64_t value, mult, div, mod + + if reso == self._reso: + return self + + if reso < self._reso: + # e.g. ns -> us + mult = get_conversion_factor(reso, self._reso) + div, mod = divmod(self.value, mult) + if mod > 0 and not round_ok: + raise ValueError("Cannot losslessly convert units") + + # Note that when mod > 0, we follow np.timedelta64 in always + # rounding down. + value = div + else: + mult = get_conversion_factor(self._reso, reso) + with cython.overflowcheck(True): + value = self.value * mult + return type(self)._from_value_and_reso(value, reso=reso) + # Python front end to C extension type _Timedelta # This serves as the box for timedelta64 diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index cf7211e82b799..5ef69075c5e2f 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -13,6 +13,7 @@ NaT, iNaT, ) +from pandas.errors import OutOfBoundsTimedelta import pandas as pd from pandas import ( @@ -24,6 +25,80 @@ import pandas._testing as tm +class TestAsUnit: + def test_as_unit(self): + td = Timedelta(days=1) + + assert td._as_unit("ns") is td + + res = td._as_unit("us") + assert res.value == td.value // 1000 + assert res._reso == td._reso - 1 + + rt = res._as_unit("ns") + assert rt.value == td.value + assert rt._reso == td._reso + + res = td._as_unit("ms") + assert res.value == td.value // 1_000_000 + assert res._reso == td._reso - 2 + + rt = res._as_unit("ns") + assert rt.value == td.value + assert rt._reso == td._reso + + res = td._as_unit("s") + assert res.value == td.value // 1_000_000_000 + assert res._reso == td._reso - 3 + + rt = res._as_unit("ns") + assert rt.value == td.value + assert rt._reso == td._reso + + def test_as_unit_overflows(self): + # microsecond that would be just out of bounds for nano + us = 9223372800000000 + td = Timedelta._from_value_and_reso(us, 9) + + msg = "Cannot cast 106752 days 00:00:00 to unit='ns' without overflow" + with pytest.raises(OutOfBoundsTimedelta, match=msg): + td._as_unit("ns") + + res = td._as_unit("ms") + assert res.value == us // 1000 + assert res._reso == 8 + + def test_as_unit_rounding(self): + td = Timedelta(microseconds=1500) + res = td._as_unit("ms") + + expected = Timedelta(milliseconds=1) + assert res == expected + + assert res._reso == 8 + assert res.value == 1 + + with pytest.raises(ValueError, match="Cannot losslessly convert units"): + td._as_unit("ms", round_ok=False) + + def test_as_unit_non_nano(self): + # case where we are going neither to nor from nano + td = Timedelta(days=1)._as_unit("D") + assert td.days == 1 + assert td.value == 1 + assert td.components.days == 1 + assert td._d == 1 + assert td.total_seconds() == 86400 + + res = td._as_unit("h") + assert res.value == 24 + assert res.components.days == 1 + assert res.components.hours == 0 + assert res._d == 1 + assert res._h == 0 + assert res.total_seconds() == 86400 + + class TestNonNano: @pytest.fixture(params=[7, 8, 9]) def unit(self, request):