Skip to content

PERF: delta_to_nanoseconds #47254

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit)
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1

cdef dict attrname_to_abbrevs

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1:


@cython.overflowcheck(True)
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit):
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1:
"""
Find the factor by which we need to multiply to convert from from_unit to to_unit.
"""
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ from .np_datetime cimport NPY_DATETIMEUNIT

# Exposed for tslib, not intended for outside use.
cpdef int64_t delta_to_nanoseconds(
delta, NPY_DATETIMEUNIT reso=*, bint round_ok=*, bint allow_year_month=*
delta, NPY_DATETIMEUNIT reso=*, bint round_ok=*
) except? -1
cdef convert_to_timedelta64(object ts, str unit)
cdef bint is_any_td_scalar(object obj)
cdef object ensure_td64ns(object ts)


cdef class _Timedelta(timedelta):
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ def delta_to_nanoseconds(
delta: np.timedelta64 | timedelta | Tick,
reso: int = ..., # NPY_DATETIMEUNIT
round_ok: bool = ...,
allow_year_month: bool = ...,
) -> int: ...

class Timedelta(timedelta):
Expand Down
60 changes: 25 additions & 35 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -206,44 +206,24 @@ cpdef int64_t delta_to_nanoseconds(
delta,
NPY_DATETIMEUNIT reso=NPY_FR_ns,
bint round_ok=True,
bint allow_year_month=False,
) except? -1:
# Note: this will raise on timedelta64 with Y or M unit

cdef:
_Timedelta td
NPY_DATETIMEUNIT in_reso
int64_t n
int64_t n, value, factor

if is_tick_object(delta):
n = delta.n
in_reso = delta._reso
if in_reso == reso:
return n
else:
td = Timedelta._from_value_and_reso(delta.n, reso=in_reso)

elif isinstance(delta, _Timedelta):
td = delta
n = delta.value
in_reso = delta._reso
if in_reso == reso:
return n

elif is_timedelta64_object(delta):
in_reso = get_datetime64_unit(delta)
n = get_timedelta64_value(delta)
if in_reso == reso:
return n
else:
# _from_value_and_reso does not support Year, Month, or unit-less,
# so we have special handling if speciifed
try:
td = Timedelta._from_value_and_reso(n, reso=in_reso)
except NotImplementedError:
if allow_year_month:
td64 = ensure_td64ns(delta)
return delta_to_nanoseconds(td64, reso=reso)
else:
raise

elif PyDelta_Check(delta):
in_reso = NPY_DATETIMEUNIT.NPY_FR_us
Expand All @@ -256,21 +236,31 @@ cpdef int64_t delta_to_nanoseconds(
except OverflowError as err:
raise OutOfBoundsTimedelta(*err.args) from err

if in_reso == reso:
return n
else:
td = Timedelta._from_value_and_reso(n, reso=in_reso)

else:
raise TypeError(type(delta))

try:
return td._as_reso(reso, round_ok=round_ok).value
except OverflowError as err:
unit_str = npy_unit_to_abbrev(reso)
raise OutOfBoundsTimedelta(
f"Cannot cast {str(delta)} to unit={unit_str} without overflow."
) from err
if reso < in_reso:
# e.g. ns -> us
factor = get_conversion_factor(reso, in_reso)
div, mod = divmod(n, factor)
if mod > 0 and not round_ok:
raise ValueError("Cannot losslessly convert units")

# Note that when mod > 0, we follow np.timedelta64 in always
# rounding down.
value = div
else:
factor = get_conversion_factor(in_reso, reso)
try:
with cython.overflowcheck(True):
value = n * factor
except OverflowError as err:
unit_str = npy_unit_to_abbrev(reso)
raise OutOfBoundsTimedelta(
f"Cannot cast {str(delta)} to unit={unit_str} without overflow."
) from err

return value


@cython.overflowcheck(True)
Expand Down
28 changes: 19 additions & 9 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ from pandas._libs.tslibs.offsets cimport (
)
from pandas._libs.tslibs.timedeltas cimport (
delta_to_nanoseconds,
ensure_td64ns,
is_any_td_scalar,
)

Expand Down Expand Up @@ -353,16 +354,25 @@ cdef class _Timestamp(ABCTimestamp):
raise NotImplementedError(self._reso)

if is_any_td_scalar(other):
if (
is_timedelta64_object(other)
and get_datetime64_unit(other) == NPY_DATETIMEUNIT.NPY_FR_GENERIC
):
# TODO: deprecate allowing this? We only get here
# with test_timedelta_add_timestamp_interval
other = np.timedelta64(other.view("i8"), "ns")
# TODO: disallow round_ok, allow_year_month?
if is_timedelta64_object(other):
other_reso = get_datetime64_unit(other)
if (
other_reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
):
# TODO: deprecate allowing this? We only get here
# with test_timedelta_add_timestamp_interval
other = np.timedelta64(other.view("i8"), "ns")
elif (
other_reso == NPY_DATETIMEUNIT.NPY_FR_Y or other_reso == NPY_DATETIMEUNIT.NPY_FR_M
):
# TODO: deprecate allowing these? or handle more like the
# corresponding DateOffsets?
# TODO: no tests get here
other = ensure_td64ns(other)

# TODO: disallow round_ok
nanos = delta_to_nanoseconds(
other, reso=self._reso, round_ok=True, allow_year_month=True
other, reso=self._reso, round_ok=True
)
try:
result = type(self)(self.value + nanos, tz=self.tzinfo)
Expand Down
11 changes: 3 additions & 8 deletions pandas/tests/scalar/timestamp/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,6 @@ def test_overflow_offset_raises(self):
r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} "
"will overflow"
)
lmsg = "|".join(
[
"Python int too large to convert to C (long|int)",
"int too big to convert",
]
)
lmsg2 = r"Cannot cast <-?20169940 \* Days> to unit=ns without overflow"

with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
Expand All @@ -68,13 +62,14 @@ def test_overflow_offset_raises(self):
stamp = Timestamp("2000/1/1")
offset_overflow = to_offset("D") * 100**5

with pytest.raises(OverflowError, match=lmsg):
lmsg3 = r"Cannot cast <-?10000000000 \* Days> to unit=ns without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
stamp + offset_overflow

with pytest.raises(OverflowError, match=msg):
offset_overflow + stamp

with pytest.raises(OverflowError, match=lmsg):
with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
stamp - offset_overflow

def test_overflow_timestamp_raises(self):
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/tslibs/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ def test_delta_to_nanoseconds_error():
delta_to_nanoseconds(np.int32(3))


def test_delta_to_nanoseconds_td64_MY_raises():
td = np.timedelta64(1234, "Y")

with pytest.raises(ValueError, match="0, 10"):
delta_to_nanoseconds(td)

td = np.timedelta64(1234, "M")

with pytest.raises(ValueError, match="1, 10"):
delta_to_nanoseconds(td)


def test_huge_nanoseconds_overflow():
# GH 32402
assert delta_to_nanoseconds(Timedelta(1e10)) == 1e10
Expand Down