Skip to content

Commit a4e3338

Browse files
authored
ENH: implement Timedelta._as_unit (#47162)
1 parent d47eae8 commit a4e3338

File tree

7 files changed

+205
-2
lines changed

7 files changed

+205
-2
lines changed

pandas/_libs/tslibs/dtypes.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
77
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil
88
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
99
cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
10+
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit)
1011

1112
cdef dict attrname_to_abbrevs
1213

pandas/_libs/tslibs/dtypes.pyx

+42
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# period frequency constants corresponding to scikits timeseries
22
# originals
3+
cimport cython
4+
35
from enum import Enum
46

57
from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT
@@ -361,6 +363,46 @@ cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1:
361363
raise NotImplementedError(reso)
362364

363365

366+
@cython.overflowcheck(True)
367+
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit):
368+
"""
369+
Find the factor by which we need to multiply to convert from from_unit to to_unit.
370+
"""
371+
if (
372+
from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
373+
or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
374+
):
375+
raise ValueError("unit-less resolutions are not supported")
376+
if from_unit > to_unit:
377+
raise ValueError
378+
379+
if from_unit == to_unit:
380+
return 1
381+
382+
if from_unit == NPY_DATETIMEUNIT.NPY_FR_W:
383+
return 7 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
384+
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_D:
385+
return 24 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
386+
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_h:
387+
return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
388+
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_m:
389+
return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
390+
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_s:
391+
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
392+
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ms:
393+
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
394+
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_us:
395+
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
396+
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ns:
397+
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
398+
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ps:
399+
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
400+
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs:
401+
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
402+
else:
403+
raise ValueError(from_unit, to_unit)
404+
405+
364406
cdef dict _reso_str_map = {
365407
Resolution.RESO_NS.value: "nanosecond",
366408
Resolution.RESO_US.value: "microsecond",

pandas/_libs/tslibs/src/datetime/np_datetime.c

+47-1
Original file line numberDiff line numberDiff line change
@@ -954,7 +954,7 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td,
954954
case NPY_FR_s:
955955
// special case where we can simplify many expressions bc per_sec=1
956956

957-
per_day = 86400000LL;
957+
per_day = 86400LL;
958958
per_sec = 1L;
959959

960960
// put frac in seconds
@@ -1023,6 +1023,52 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td,
10231023
}
10241024
break;
10251025

1026+
case NPY_FR_m:
1027+
1028+
out->days = td / 1440LL;
1029+
td -= out->days * 1440LL;
1030+
out->hrs = td / 60LL;
1031+
td -= out->hrs * 60LL;
1032+
out->min = td;
1033+
1034+
out->sec = 0;
1035+
out->ms = 0;
1036+
out->us = 0;
1037+
out->ns = 0;
1038+
break;
1039+
1040+
case NPY_FR_h:
1041+
out->days = td / 24LL;
1042+
td -= out->days * 24LL;
1043+
out->hrs = td;
1044+
1045+
out->min = 0;
1046+
out->sec = 0;
1047+
out->ms = 0;
1048+
out->us = 0;
1049+
out->ns = 0;
1050+
break;
1051+
1052+
case NPY_FR_D:
1053+
out->days = td;
1054+
out->hrs = 0;
1055+
out->min = 0;
1056+
out->sec = 0;
1057+
out->ms = 0;
1058+
out->us = 0;
1059+
out->ns = 0;
1060+
break;
1061+
1062+
case NPY_FR_W:
1063+
out->days = 7 * td;
1064+
out->hrs = 0;
1065+
out->min = 0;
1066+
out->sec = 0;
1067+
out->ms = 0;
1068+
out->us = 0;
1069+
out->ns = 0;
1070+
break;
1071+
10261072
default:
10271073
PyErr_SetString(PyExc_RuntimeError,
10281074
"NumPy timedelta metadata is corrupted with "

pandas/_libs/tslibs/timedeltas.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ cdef class _Timedelta(timedelta):
2121
cdef bint _has_ns(self)
2222
cdef _ensure_components(_Timedelta self)
2323
cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op)
24+
cdef _Timedelta _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=*)

pandas/_libs/tslibs/timedeltas.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,4 @@ class Timedelta(timedelta):
152152
def freq(self) -> None: ...
153153
@property
154154
def is_populated(self) -> bool: ...
155+
def _as_unit(self, unit: str, round_ok: bool = ...) -> Timedelta: ...

pandas/_libs/tslibs/timedeltas.pyx

+38-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ from pandas._libs.tslibs.conversion cimport (
3535
cast_from_unit,
3636
precision_from_unit,
3737
)
38-
from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev
38+
from pandas._libs.tslibs.dtypes cimport (
39+
get_conversion_factor,
40+
npy_unit_to_abbrev,
41+
)
3942
from pandas._libs.tslibs.nattype cimport (
4043
NPY_NAT,
4144
c_NaT as NaT,
@@ -1377,6 +1380,40 @@ cdef class _Timedelta(timedelta):
13771380
# exposing as classmethod for testing
13781381
return _timedelta_from_value_and_reso(value, reso)
13791382

1383+
def _as_unit(self, str unit, bint round_ok=True):
1384+
dtype = np.dtype(f"m8[{unit}]")
1385+
reso = get_unit_from_dtype(dtype)
1386+
try:
1387+
return self._as_reso(reso, round_ok=round_ok)
1388+
except OverflowError as err:
1389+
raise OutOfBoundsTimedelta(
1390+
f"Cannot cast {self} to unit='{unit}' without overflow."
1391+
) from err
1392+
1393+
@cython.cdivision(False)
1394+
cdef _Timedelta _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=True):
1395+
cdef:
1396+
int64_t value, mult, div, mod
1397+
1398+
if reso == self._reso:
1399+
return self
1400+
1401+
if reso < self._reso:
1402+
# e.g. ns -> us
1403+
mult = get_conversion_factor(reso, self._reso)
1404+
div, mod = divmod(self.value, mult)
1405+
if mod > 0 and not round_ok:
1406+
raise ValueError("Cannot losslessly convert units")
1407+
1408+
# Note that when mod > 0, we follow np.timedelta64 in always
1409+
# rounding down.
1410+
value = div
1411+
else:
1412+
mult = get_conversion_factor(self._reso, reso)
1413+
with cython.overflowcheck(True):
1414+
value = self.value * mult
1415+
return type(self)._from_value_and_reso(value, reso=reso)
1416+
13801417

13811418
# Python front end to C extension type _Timedelta
13821419
# This serves as the box for timedelta64

pandas/tests/scalar/timedelta/test_timedelta.py

+75
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
NaT,
1414
iNaT,
1515
)
16+
from pandas.errors import OutOfBoundsTimedelta
1617

1718
import pandas as pd
1819
from pandas import (
@@ -24,6 +25,80 @@
2425
import pandas._testing as tm
2526

2627

28+
class TestAsUnit:
29+
def test_as_unit(self):
30+
td = Timedelta(days=1)
31+
32+
assert td._as_unit("ns") is td
33+
34+
res = td._as_unit("us")
35+
assert res.value == td.value // 1000
36+
assert res._reso == td._reso - 1
37+
38+
rt = res._as_unit("ns")
39+
assert rt.value == td.value
40+
assert rt._reso == td._reso
41+
42+
res = td._as_unit("ms")
43+
assert res.value == td.value // 1_000_000
44+
assert res._reso == td._reso - 2
45+
46+
rt = res._as_unit("ns")
47+
assert rt.value == td.value
48+
assert rt._reso == td._reso
49+
50+
res = td._as_unit("s")
51+
assert res.value == td.value // 1_000_000_000
52+
assert res._reso == td._reso - 3
53+
54+
rt = res._as_unit("ns")
55+
assert rt.value == td.value
56+
assert rt._reso == td._reso
57+
58+
def test_as_unit_overflows(self):
59+
# microsecond that would be just out of bounds for nano
60+
us = 9223372800000000
61+
td = Timedelta._from_value_and_reso(us, 9)
62+
63+
msg = "Cannot cast 106752 days 00:00:00 to unit='ns' without overflow"
64+
with pytest.raises(OutOfBoundsTimedelta, match=msg):
65+
td._as_unit("ns")
66+
67+
res = td._as_unit("ms")
68+
assert res.value == us // 1000
69+
assert res._reso == 8
70+
71+
def test_as_unit_rounding(self):
72+
td = Timedelta(microseconds=1500)
73+
res = td._as_unit("ms")
74+
75+
expected = Timedelta(milliseconds=1)
76+
assert res == expected
77+
78+
assert res._reso == 8
79+
assert res.value == 1
80+
81+
with pytest.raises(ValueError, match="Cannot losslessly convert units"):
82+
td._as_unit("ms", round_ok=False)
83+
84+
def test_as_unit_non_nano(self):
85+
# case where we are going neither to nor from nano
86+
td = Timedelta(days=1)._as_unit("D")
87+
assert td.days == 1
88+
assert td.value == 1
89+
assert td.components.days == 1
90+
assert td._d == 1
91+
assert td.total_seconds() == 86400
92+
93+
res = td._as_unit("h")
94+
assert res.value == 24
95+
assert res.components.days == 1
96+
assert res.components.hours == 0
97+
assert res._d == 1
98+
assert res._h == 0
99+
assert res.total_seconds() == 86400
100+
101+
27102
class TestNonNano:
28103
@pytest.fixture(params=[7, 8, 9])
29104
def unit(self, request):

0 commit comments

Comments
 (0)