Skip to content

Commit 468bd03

Browse files
jbrockmendelyehoshuadimarsky
authored andcommitted
PERF: delta_to_nanoseconds (pandas-dev#47254)
1 parent 5229259 commit 468bd03

File tree

8 files changed

+63
-56
lines changed

8 files changed

+63
-56
lines changed

pandas/_libs/tslibs/dtypes.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
77
cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) nogil
88
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
99
cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
10-
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit)
10+
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1
1111

1212
cdef dict attrname_to_abbrevs
1313

pandas/_libs/tslibs/dtypes.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ cdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1:
384384

385385

386386
@cython.overflowcheck(True)
387-
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit):
387+
cdef int64_t get_conversion_factor(NPY_DATETIMEUNIT from_unit, NPY_DATETIMEUNIT to_unit) except? -1:
388388
"""
389389
Find the factor by which we need to multiply to convert from from_unit to to_unit.
390390
"""

pandas/_libs/tslibs/timedeltas.pxd

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@ from .np_datetime cimport NPY_DATETIMEUNIT
66

77
# Exposed for tslib, not intended for outside use.
88
cpdef int64_t delta_to_nanoseconds(
9-
delta, NPY_DATETIMEUNIT reso=*, bint round_ok=*, bint allow_year_month=*
9+
delta, NPY_DATETIMEUNIT reso=*, bint round_ok=*
1010
) except? -1
1111
cdef convert_to_timedelta64(object ts, str unit)
1212
cdef bint is_any_td_scalar(object obj)
13+
cdef object ensure_td64ns(object ts)
1314

1415

1516
cdef class _Timedelta(timedelta):

pandas/_libs/tslibs/timedeltas.pyi

-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ def delta_to_nanoseconds(
7676
delta: np.timedelta64 | timedelta | Tick,
7777
reso: int = ..., # NPY_DATETIMEUNIT
7878
round_ok: bool = ...,
79-
allow_year_month: bool = ...,
8079
) -> int: ...
8180

8281
class Timedelta(timedelta):

pandas/_libs/tslibs/timedeltas.pyx

+25-35
Original file line numberDiff line numberDiff line change
@@ -206,44 +206,24 @@ cpdef int64_t delta_to_nanoseconds(
206206
delta,
207207
NPY_DATETIMEUNIT reso=NPY_FR_ns,
208208
bint round_ok=True,
209-
bint allow_year_month=False,
210209
) except? -1:
210+
# Note: this will raise on timedelta64 with Y or M unit
211+
211212
cdef:
212-
_Timedelta td
213213
NPY_DATETIMEUNIT in_reso
214-
int64_t n
214+
int64_t n, value, factor
215215

216216
if is_tick_object(delta):
217217
n = delta.n
218218
in_reso = delta._reso
219-
if in_reso == reso:
220-
return n
221-
else:
222-
td = Timedelta._from_value_and_reso(delta.n, reso=in_reso)
223219

224220
elif isinstance(delta, _Timedelta):
225-
td = delta
226221
n = delta.value
227222
in_reso = delta._reso
228-
if in_reso == reso:
229-
return n
230223

231224
elif is_timedelta64_object(delta):
232225
in_reso = get_datetime64_unit(delta)
233226
n = get_timedelta64_value(delta)
234-
if in_reso == reso:
235-
return n
236-
else:
237-
# _from_value_and_reso does not support Year, Month, or unit-less,
238-
# so we have special handling if speciifed
239-
try:
240-
td = Timedelta._from_value_and_reso(n, reso=in_reso)
241-
except NotImplementedError:
242-
if allow_year_month:
243-
td64 = ensure_td64ns(delta)
244-
return delta_to_nanoseconds(td64, reso=reso)
245-
else:
246-
raise
247227

248228
elif PyDelta_Check(delta):
249229
in_reso = NPY_DATETIMEUNIT.NPY_FR_us
@@ -256,21 +236,31 @@ cpdef int64_t delta_to_nanoseconds(
256236
except OverflowError as err:
257237
raise OutOfBoundsTimedelta(*err.args) from err
258238

259-
if in_reso == reso:
260-
return n
261-
else:
262-
td = Timedelta._from_value_and_reso(n, reso=in_reso)
263-
264239
else:
265240
raise TypeError(type(delta))
266241

267-
try:
268-
return td._as_reso(reso, round_ok=round_ok).value
269-
except OverflowError as err:
270-
unit_str = npy_unit_to_abbrev(reso)
271-
raise OutOfBoundsTimedelta(
272-
f"Cannot cast {str(delta)} to unit={unit_str} without overflow."
273-
) from err
242+
if reso < in_reso:
243+
# e.g. ns -> us
244+
factor = get_conversion_factor(reso, in_reso)
245+
div, mod = divmod(n, factor)
246+
if mod > 0 and not round_ok:
247+
raise ValueError("Cannot losslessly convert units")
248+
249+
# Note that when mod > 0, we follow np.timedelta64 in always
250+
# rounding down.
251+
value = div
252+
else:
253+
factor = get_conversion_factor(in_reso, reso)
254+
try:
255+
with cython.overflowcheck(True):
256+
value = n * factor
257+
except OverflowError as err:
258+
unit_str = npy_unit_to_abbrev(reso)
259+
raise OutOfBoundsTimedelta(
260+
f"Cannot cast {str(delta)} to unit={unit_str} without overflow."
261+
) from err
262+
263+
return value
274264

275265

276266
@cython.overflowcheck(True)

pandas/_libs/tslibs/timestamps.pyx

+19-9
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ from pandas._libs.tslibs.offsets cimport (
9999
)
100100
from pandas._libs.tslibs.timedeltas cimport (
101101
delta_to_nanoseconds,
102+
ensure_td64ns,
102103
is_any_td_scalar,
103104
)
104105

@@ -353,16 +354,25 @@ cdef class _Timestamp(ABCTimestamp):
353354
raise NotImplementedError(self._reso)
354355

355356
if is_any_td_scalar(other):
356-
if (
357-
is_timedelta64_object(other)
358-
and get_datetime64_unit(other) == NPY_DATETIMEUNIT.NPY_FR_GENERIC
359-
):
360-
# TODO: deprecate allowing this? We only get here
361-
# with test_timedelta_add_timestamp_interval
362-
other = np.timedelta64(other.view("i8"), "ns")
363-
# TODO: disallow round_ok, allow_year_month?
357+
if is_timedelta64_object(other):
358+
other_reso = get_datetime64_unit(other)
359+
if (
360+
other_reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
361+
):
362+
# TODO: deprecate allowing this? We only get here
363+
# with test_timedelta_add_timestamp_interval
364+
other = np.timedelta64(other.view("i8"), "ns")
365+
elif (
366+
other_reso == NPY_DATETIMEUNIT.NPY_FR_Y or other_reso == NPY_DATETIMEUNIT.NPY_FR_M
367+
):
368+
# TODO: deprecate allowing these? or handle more like the
369+
# corresponding DateOffsets?
370+
# TODO: no tests get here
371+
other = ensure_td64ns(other)
372+
373+
# TODO: disallow round_ok
364374
nanos = delta_to_nanoseconds(
365-
other, reso=self._reso, round_ok=True, allow_year_month=True
375+
other, reso=self._reso, round_ok=True
366376
)
367377
try:
368378
result = type(self)(self.value + nanos, tz=self.tzinfo)

pandas/tests/scalar/timestamp/test_arithmetic.py

+3-8
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,6 @@ def test_overflow_offset_raises(self):
4545
r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} "
4646
"will overflow"
4747
)
48-
lmsg = "|".join(
49-
[
50-
"Python int too large to convert to C (long|int)",
51-
"int too big to convert",
52-
]
53-
)
5448
lmsg2 = r"Cannot cast <-?20169940 \* Days> to unit=ns without overflow"
5549

5650
with pytest.raises(OutOfBoundsTimedelta, match=lmsg2):
@@ -68,13 +62,14 @@ def test_overflow_offset_raises(self):
6862
stamp = Timestamp("2000/1/1")
6963
offset_overflow = to_offset("D") * 100**5
7064

71-
with pytest.raises(OverflowError, match=lmsg):
65+
lmsg3 = r"Cannot cast <-?10000000000 \* Days> to unit=ns without overflow"
66+
with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
7267
stamp + offset_overflow
7368

7469
with pytest.raises(OverflowError, match=msg):
7570
offset_overflow + stamp
7671

77-
with pytest.raises(OverflowError, match=lmsg):
72+
with pytest.raises(OutOfBoundsTimedelta, match=lmsg3):
7873
stamp - offset_overflow
7974

8075
def test_overflow_timestamp_raises(self):

pandas/tests/tslibs/test_timedeltas.py

+12
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,18 @@ def test_delta_to_nanoseconds_error():
5555
delta_to_nanoseconds(np.int32(3))
5656

5757

58+
def test_delta_to_nanoseconds_td64_MY_raises():
59+
td = np.timedelta64(1234, "Y")
60+
61+
with pytest.raises(ValueError, match="0, 10"):
62+
delta_to_nanoseconds(td)
63+
64+
td = np.timedelta64(1234, "M")
65+
66+
with pytest.raises(ValueError, match="1, 10"):
67+
delta_to_nanoseconds(td)
68+
69+
5870
def test_huge_nanoseconds_overflow():
5971
# GH 32402
6072
assert delta_to_nanoseconds(Timedelta(1e10)) == 1e10

0 commit comments

Comments
 (0)