Skip to content

ENH: implement Timedelta._as_unit #47162

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit)

cdef dict attrname_to_abbrevs

Expand Down
42 changes: 42 additions & 0 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# period frequency constants corresponding to scikits timeseries
# originals
cimport cython

from enum import Enum

from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT
Expand Down Expand Up @@ -361,6 +363,46 @@ cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1:
raise NotImplementedError(reso)


@cython.overflowcheck(True)
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit):
"""
Find the factor by which we need to multiply to convert from from_unit to to_unit.
"""
if (
from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
):
raise ValueError("unit-less resolutions are not supported")
if from_unit > to_unit:
raise ValueError

if from_unit == to_unit:
return 1

if from_unit == NPY_DATETIMEUNIT.NPY_FR_W:
return 7 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_D:
return 24 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_h:
return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_m:
return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_s:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ms:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_us:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ns:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ps:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs:
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
else:
raise ValueError(from_unit, to_unit)


cdef dict _reso_str_map = {
Resolution.RESO_NS.value: "nanosecond",
Resolution.RESO_US.value: "microsecond",
Expand Down
48 changes: 47 additions & 1 deletion pandas/_libs/tslibs/src/datetime/np_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td,
case NPY_FR_s:
// special case where we can simplify many expressions bc per_sec=1

per_day = 86400000LL;
per_day = 86400LL;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did this fix a bug somewhere?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

turns out not to be relevant, could actually get rid of per_day here entirely

per_sec = 1L;

// put frac in seconds
Expand Down Expand Up @@ -1023,6 +1023,52 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta td,
}
break;

case NPY_FR_m:

out->days = td / 1440LL;
td -= out->days * 1440LL;
out->hrs = td / 60LL;
td -= out->hrs * 60LL;
out->min = td;

out->sec = 0;
out->ms = 0;
out->us = 0;
out->ns = 0;
break;

case NPY_FR_h:
out->days = td / 24LL;
td -= out->days * 24LL;
out->hrs = td;

out->min = 0;
out->sec = 0;
out->ms = 0;
out->us = 0;
out->ns = 0;
break;

case NPY_FR_D:
out->days = td;
out->hrs = 0;
out->min = 0;
out->sec = 0;
out->ms = 0;
out->us = 0;
out->ns = 0;
break;

case NPY_FR_W:
out->days = 7 * td;
out->hrs = 0;
out->min = 0;
out->sec = 0;
out->ms = 0;
out->us = 0;
out->ns = 0;
break;

default:
PyErr_SetString(PyExc_RuntimeError,
"NumPy timedelta metadata is corrupted with "
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/timedeltas.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ cdef class _Timedelta(timedelta):
cdef bint _has_ns(self)
cdef _ensure_components(_Timedelta self)
cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op)
cdef _Timedelta _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=*)
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/timedeltas.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,4 @@ class Timedelta(timedelta):
def freq(self) -> None: ...
@property
def is_populated(self) -> bool: ...
def _as_unit(self, unit: str, round_ok: bool = ...) -> Timedelta: ...
39 changes: 38 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ from pandas._libs.tslibs.conversion cimport (
cast_from_unit,
precision_from_unit,
)
from pandas._libs.tslibs.dtypes cimport npy_unit_to_abbrev
from pandas._libs.tslibs.dtypes cimport (
get_conversion_factor,
npy_unit_to_abbrev,
)
from pandas._libs.tslibs.nattype cimport (
NPY_NAT,
c_NaT as NaT,
Expand Down Expand Up @@ -1377,6 +1380,40 @@ cdef class _Timedelta(timedelta):
# exposing as classmethod for testing
return _timedelta_from_value_and_reso(value, reso)

def _as_unit(self, str unit, bint round_ok=True):
dtype = np.dtype(f"m8[{unit}]")
reso = get_unit_from_dtype(dtype)
try:
return self._as_reso(reso, round_ok=round_ok)
except OverflowError as err:
raise OutOfBoundsTimedelta(
f"Cannot cast {self} to unit='{unit}' without overflow."
) from err

@cython.cdivision(False)
cdef _Timedelta _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=True):
cdef:
int64_t value, mult, div, mod

if reso == self._reso:
return self

if reso < self._reso:
# e.g. ns -> us
mult = get_conversion_factor(reso, self._reso)
div, mod = divmod(self.value, mult)
if mod > 0 and not round_ok:
raise ValueError("Cannot losslessly convert units")

# Note that when mod > 0, we follow np.timedelta64 in always
# rounding down.
value = div
else:
mult = get_conversion_factor(self._reso, reso)
with cython.overflowcheck(True):
value = self.value * mult
return type(self)._from_value_and_reso(value, reso=reso)


# Python front end to C extension type _Timedelta
# This serves as the box for timedelta64
Expand Down
75 changes: 75 additions & 0 deletions pandas/tests/scalar/timedelta/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
NaT,
iNaT,
)
from pandas.errors import OutOfBoundsTimedelta

import pandas as pd
from pandas import (
Expand All @@ -24,6 +25,80 @@
import pandas._testing as tm


class TestAsUnit:
def test_as_unit(self):
td = Timedelta(days=1)

assert td._as_unit("ns") is td

res = td._as_unit("us")
assert res.value == td.value // 1000
assert res._reso == td._reso - 1

rt = res._as_unit("ns")
assert rt.value == td.value
assert rt._reso == td._reso

res = td._as_unit("ms")
assert res.value == td.value // 1_000_000
assert res._reso == td._reso - 2

rt = res._as_unit("ns")
assert rt.value == td.value
assert rt._reso == td._reso

res = td._as_unit("s")
assert res.value == td.value // 1_000_000_000
assert res._reso == td._reso - 3

rt = res._as_unit("ns")
assert rt.value == td.value
assert rt._reso == td._reso

def test_as_unit_overflows(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just confirming aloud, based on the multiplication operation we can't get to a state where underflowing occurs (and therefore can't test that)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is all in integers, so i dont think underflows are an issue

# microsecond that would be just out of bounds for nano
us = 9223372800000000
td = Timedelta._from_value_and_reso(us, 9)

msg = "Cannot cast 106752 days 00:00:00 to unit='ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
td._as_unit("ns")

res = td._as_unit("ms")
assert res.value == us // 1000
assert res._reso == 8

def test_as_unit_rounding(self):
td = Timedelta(microseconds=1500)
res = td._as_unit("ms")

expected = Timedelta(milliseconds=1)
assert res == expected

assert res._reso == 8
assert res.value == 1

with pytest.raises(ValueError, match="Cannot losslessly convert units"):
td._as_unit("ms", round_ok=False)

def test_as_unit_non_nano(self):
# case where we are going neither to nor from nano
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really sure what neither to nor means (if it's an idiom)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"not to nano" and "not from nano"

td = Timedelta(days=1)._as_unit("D")
assert td.days == 1
assert td.value == 1
assert td.components.days == 1
assert td._d == 1
assert td.total_seconds() == 86400

res = td._as_unit("h")
assert res.value == 24
assert res.components.days == 1
assert res.components.hours == 0
assert res._d == 1
assert res._h == 0
assert res.total_seconds() == 86400


class TestNonNano:
@pytest.fixture(params=[7, 8, 9])
def unit(self, request):
Expand Down