Skip to content

Commit cd80f08

Browse files
WillAydjreback
authored andcommitted
PERF: Vectorized Timedelta property access (#18092) (#18225)
1 parent 40fd6b4 commit cd80f08

File tree

9 files changed

+341
-80
lines changed

9 files changed

+341
-80
lines changed

asv_bench/benchmarks/timedelta.py

+43
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,46 @@ def setup(self):
4040

4141
def test_add_td_ts(self):
4242
self.td + self.ts
43+
44+
45+
class TimedeltaProperties(object):
46+
goal_time = 0.2
47+
48+
def setup(self):
49+
self.td = Timedelta(days=365, minutes=35, seconds=25, milliseconds=35)
50+
51+
def time_timedelta_days(self):
52+
self.td.days
53+
54+
def time_timedelta_seconds(self):
55+
self.td.seconds
56+
57+
def time_timedelta_microseconds(self):
58+
self.td.microseconds
59+
60+
def time_timedelta_nanoseconds(self):
61+
self.td.nanoseconds
62+
63+
64+
class DatetimeAccessor(object):
65+
goal_time = 0.2
66+
67+
def setup(self):
68+
self.N = 100000
69+
self.series = pd.Series(
70+
pd.timedelta_range('1 days', periods=self.N, freq='h')
71+
)
72+
def time_dt_accessor(self):
73+
self.series.dt
74+
75+
def time_timedelta_dt_accessor_days(self):
76+
self.series.dt.days
77+
78+
def time_timedelta_dt_accessor_seconds(self):
79+
self.series.dt.seconds
80+
81+
def time_timedelta_dt_accessor_microseconds(self):
82+
self.series.dt.microseconds
83+
84+
def time_timedelta_dt_accessor_nanoseconds(self):
85+
self.series.dt.nanoseconds

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ Performance Improvements
7272
- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`)
7373
- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`)
7474
- :class`DateOffset` arithmetic performance is improved (:issue:`18218`)
75+
- Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`)
7576
-
7677

7778
.. _whatsnew_0220.docs:

pandas/_libs/src/datetime/np_datetime.c

+115
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,17 @@ void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr,
562562
convert_datetime_to_datetimestruct(&meta, val, result);
563563
}
564564

565+
void pandas_timedelta_to_timedeltastruct(npy_timedelta val,
566+
PANDAS_DATETIMEUNIT fr,
567+
pandas_timedeltastruct *result) {
568+
pandas_datetime_metadata meta;
569+
570+
meta.base = fr;
571+
meta.num - 1;
572+
573+
convert_timedelta_to_timedeltastruct(&meta, val, result);
574+
}
575+
565576
PANDAS_DATETIMEUNIT get_datetime64_unit(PyObject *obj) {
566577
return (PANDAS_DATETIMEUNIT)((PyDatetimeScalarObject *)obj)->obmeta.base;
567578
}
@@ -980,3 +991,107 @@ int convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
980991

981992
return 0;
982993
}
994+
995+
/*
996+
* Converts a timedelta from a timedeltastruct to a timedelta based
997+
* on some metadata. The timedelta is assumed to be valid.
998+
*
999+
* Returns 0 on success, -1 on failure.
1000+
*/
1001+
int convert_timedelta_to_timedeltastruct(pandas_timedelta_metadata *meta,
1002+
npy_timedelta td,
1003+
pandas_timedeltastruct *out) {
1004+
npy_int64 perday;
1005+
npy_int64 frac;
1006+
npy_int64 sfrac;
1007+
npy_int64 ifrac;
1008+
int sign;
1009+
npy_int64 DAY_NS = 86400000000000LL;
1010+
1011+
/* Initialize the output to all zeros */
1012+
memset(out, 0, sizeof(pandas_timedeltastruct));
1013+
1014+
switch (meta->base) {
1015+
case PANDAS_FR_ns:
1016+
1017+
// put frac in seconds
1018+
if (td < 0 && td % (1000LL * 1000LL * 1000LL) != 0)
1019+
frac = td / (1000LL * 1000LL * 1000LL) - 1;
1020+
else
1021+
frac = td / (1000LL * 1000LL * 1000LL);
1022+
1023+
if (frac < 0) {
1024+
sign = -1;
1025+
1026+
// even fraction
1027+
if ((-frac % 86400LL) != 0) {
1028+
out->days = -frac / 86400LL + 1;
1029+
frac += 86400LL * out->days;
1030+
} else {
1031+
frac = -frac;
1032+
}
1033+
} else {
1034+
sign = 1;
1035+
out->days = 0;
1036+
}
1037+
1038+
if (frac >= 86400) {
1039+
out->days += frac / 86400LL;
1040+
frac -= out->days * 86400LL;
1041+
}
1042+
1043+
if (frac >= 3600) {
1044+
out->hrs = frac / 3600LL;
1045+
frac -= out->hrs * 3600LL;
1046+
} else {
1047+
out->hrs = 0;
1048+
}
1049+
1050+
if (frac >= 60) {
1051+
out->min = frac / 60LL;
1052+
frac -= out->min * 60LL;
1053+
} else {
1054+
out->min = 0;
1055+
}
1056+
1057+
if (frac >= 0) {
1058+
out->sec = frac;
1059+
frac -= out->sec;
1060+
} else {
1061+
out->sec = 0;
1062+
}
1063+
1064+
sfrac = (out->hrs * 3600LL + out->min * 60LL
1065+
+ out->sec) * (1000LL * 1000LL * 1000LL);
1066+
1067+
if (sign < 0)
1068+
out->days = -out->days;
1069+
1070+
ifrac = td - (out->days * DAY_NS + sfrac);
1071+
1072+
if (ifrac != 0) {
1073+
out->ms = ifrac / (1000LL * 1000LL);
1074+
ifrac -= out->ms * 1000LL * 1000LL;
1075+
out->us = ifrac / 1000LL;
1076+
ifrac -= out->us * 1000LL;
1077+
out->ns = ifrac;
1078+
} else {
1079+
out->ms = 0;
1080+
out->us = 0;
1081+
out->ns = 0;
1082+
}
1083+
1084+
out->seconds = out->hrs * 3600 + out->min * 60 + out->sec;
1085+
out->microseconds = out->ms * 1000 + out->us;
1086+
out->nanoseconds = out->ns;
1087+
break;
1088+
1089+
default:
1090+
PyErr_SetString(PyExc_RuntimeError,
1091+
"NumPy datetime metadata is corrupted with invalid "
1092+
"base unit");
1093+
return -1;
1094+
}
1095+
1096+
return 0;
1097+
}

pandas/_libs/src/datetime/np_datetime.h

+16
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,18 @@ typedef struct {
4949
npy_int32 month, day, hour, min, sec, us, ps, as;
5050
} pandas_datetimestruct;
5151

52+
typedef struct {
53+
npy_int64 days;
54+
npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds;
55+
} pandas_timedeltastruct;
56+
5257
typedef struct {
5358
PANDAS_DATETIMEUNIT base;
5459
int num;
5560
} pandas_datetime_metadata;
5661

62+
typedef pandas_datetime_metadata pandas_timedelta_metadata;
63+
5764
extern const pandas_datetimestruct _NS_MIN_DTS;
5865
extern const pandas_datetimestruct _NS_MAX_DTS;
5966

@@ -71,6 +78,10 @@ npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr,
7178
void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr,
7279
pandas_datetimestruct *result);
7380

81+
void pandas_timedelta_to_timedeltastruct(npy_timedelta val,
82+
PANDAS_DATETIMEUNIT fr,
83+
pandas_timedeltastruct *result);
84+
7485
int dayofweek(int y, int m, int d);
7586

7687
extern const int days_per_month_table[2][12];
@@ -131,6 +142,11 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
131142
npy_datetime dt,
132143
pandas_datetimestruct *out);
133144

145+
int
146+
convert_timedelta_to_timedeltastruct(pandas_timedelta_metadata *meta,
147+
npy_timedelta td,
148+
pandas_timedeltastruct *out);
149+
134150

135151
PANDAS_DATETIMEUNIT get_datetime64_unit(PyObject *obj);
136152

pandas/_libs/tslibs/fields.pyx

+119-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ from numpy cimport ndarray, int64_t, int32_t, int8_t
1717
np.import_array()
1818

1919

20-
from np_datetime cimport pandas_datetimestruct, dt64_to_dtstruct
20+
from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct,
21+
dt64_to_dtstruct, td64_to_tdstruct)
2122

2223
from datetime cimport (
2324
days_per_month_table,
@@ -545,6 +546,123 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
545546
raise ValueError("Field %s not supported" % field)
546547

547548

549+
@cython.wraparound(False)
550+
@cython.boundscheck(False)
551+
def get_timedelta_field(ndarray[int64_t] tdindex, object field):
552+
"""
553+
Given a int64-based timedelta index, extract the days, hrs, sec.,
554+
field and return an array of these values.
555+
"""
556+
cdef:
557+
Py_ssize_t i, count = 0
558+
ndarray[int32_t] out
559+
pandas_timedeltastruct tds
560+
561+
count = len(tdindex)
562+
out = np.empty(count, dtype='i4')
563+
564+
if field == 'days':
565+
with nogil:
566+
for i in range(count):
567+
if tdindex[i] == NPY_NAT:
568+
out[i] = -1
569+
continue
570+
571+
td64_to_tdstruct(tdindex[i], &tds)
572+
out[i] = tds.days
573+
return out
574+
575+
elif field == 'h':
576+
with nogil:
577+
for i in range(count):
578+
if tdindex[i] == NPY_NAT:
579+
out[i] = -1
580+
continue
581+
582+
td64_to_tdstruct(tdindex[i], &tds)
583+
out[i] = tds.hrs
584+
return out
585+
586+
elif field == 's':
587+
with nogil:
588+
for i in range(count):
589+
if tdindex[i] == NPY_NAT:
590+
out[i] = -1
591+
continue
592+
593+
td64_to_tdstruct(tdindex[i], &tds)
594+
out[i] = tds.sec
595+
return out
596+
597+
elif field == 'seconds':
598+
with nogil:
599+
for i in range(count):
600+
if tdindex[i] == NPY_NAT:
601+
out[i] = -1
602+
continue
603+
604+
td64_to_tdstruct(tdindex[i], &tds)
605+
out[i] = tds.seconds
606+
return out
607+
608+
elif field == 'ms':
609+
with nogil:
610+
for i in range(count):
611+
if tdindex[i] == NPY_NAT:
612+
out[i] = -1
613+
continue
614+
615+
td64_to_tdstruct(tdindex[i], &tds)
616+
out[i] = tds.ms
617+
return out
618+
619+
elif field == 'microseconds':
620+
with nogil:
621+
for i in range(count):
622+
if tdindex[i] == NPY_NAT:
623+
out[i] = -1
624+
continue
625+
626+
td64_to_tdstruct(tdindex[i], &tds)
627+
out[i] = tds.microseconds
628+
return out
629+
630+
elif field == 'us':
631+
with nogil:
632+
for i in range(count):
633+
if tdindex[i] == NPY_NAT:
634+
out[i] = -1
635+
continue
636+
637+
td64_to_tdstruct(tdindex[i], &tds)
638+
out[i] = tds.us
639+
return out
640+
641+
elif field == 'ns':
642+
with nogil:
643+
for i in range(count):
644+
if tdindex[i] == NPY_NAT:
645+
out[i] = -1
646+
continue
647+
648+
td64_to_tdstruct(tdindex[i], &tds)
649+
out[i] = tds.ns
650+
return out
651+
652+
elif field == 'nanoseconds':
653+
with nogil:
654+
for i in range(count):
655+
if tdindex[i] == NPY_NAT:
656+
out[i] = -1
657+
continue
658+
659+
td64_to_tdstruct(tdindex[i], &tds)
660+
out[i] = tds.nanoseconds
661+
return out
662+
663+
raise ValueError("Field %s not supported" % field)
664+
665+
548666
cdef inline int days_in_month(pandas_datetimestruct dts) nogil:
549667
return days_per_month_table[is_leapyear(dts.year)][dts.month - 1]
550668

pandas/_libs/tslibs/np_datetime.pxd

+5
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ cdef extern from "../src/datetime/np_datetime.h":
3030
int64_t year
3131
int32_t month, day, hour, min, sec, us, ps, as
3232

33+
ctypedef struct pandas_timedeltastruct:
34+
int64_t days
35+
int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds
36+
3337
ctypedef enum PANDAS_DATETIMEUNIT:
3438
PANDAS_FR_Y
3539
PANDAS_FR_M
@@ -54,6 +58,7 @@ cdef check_dts_bounds(pandas_datetimestruct *dts)
5458

5559
cdef int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil
5660
cdef void dt64_to_dtstruct(int64_t dt64, pandas_datetimestruct* out) nogil
61+
cdef void td64_to_tdstruct(int64_t td64, pandas_timedeltastruct* out) nogil
5762

5863
cdef int64_t pydatetime_to_dt64(datetime val, pandas_datetimestruct *dts)
5964
cdef int64_t pydate_to_dt64(date val, pandas_datetimestruct *dts)

0 commit comments

Comments
 (0)