Skip to content

Commit 9003258

Browse files
committed
PERF: Vectorized Timedelta property access (#18092)
1 parent f68bf25 commit 9003258

File tree

10 files changed

+335
-82
lines changed

10 files changed

+335
-82
lines changed

asv_bench/benchmarks/timedelta.py

+41
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,44 @@ def setup(self):
4040

4141
def test_add_td_ts(self):
4242
self.td + self.ts
43+
44+
class TimedeltaProperties(object):
45+
goal_time = 0.2
46+
47+
def setup(self):
48+
self.td = Timedelta(days=365, minutes=35, seconds=25, milliseconds=35)
49+
50+
def timedelta_days(self):
51+
self.td.days
52+
53+
def timedelta_seconds(self):
54+
self.td.seconds
55+
56+
def timedelta_microseconds(self):
57+
self.td.microseconds
58+
59+
def timedelta_nanoseconds(self):
60+
self.td.nanoseconds
61+
62+
class DatetimeAccessor(object):
63+
goal_time = 0.2
64+
65+
def setup(self):
66+
self.N = 100000
67+
self.series = pd.Series(
68+
pd.timedelta_range('1 days', periods=self.N, freq='h')
69+
)
70+
def time_dt_accessor(self):
71+
self.series.dt
72+
73+
def timedelta_dt_accessor_days(self):
74+
self.series.dt.days
75+
76+
def timedelta_dt_accessor_seconds(self):
77+
self.series.dt.seconds
78+
79+
def timedelta_dt_accessor_microseconds(self):
80+
self.series.dt.microseconds
81+
82+
def timedelta_dt_accessor_nanoseconds(self):
83+
self.series.dt.nanoseconds

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ Performance Improvements
7070
~~~~~~~~~~~~~~~~~~~~~~~~
7171

7272
- Indexers on Series or DataFrame no longer create a reference cycle (:issue:`17956`)
73-
-
73+
- Vectorized Timedelta property access (:issue:`18092`)
7474
-
7575

7676
.. _whatsnew_0220.docs:

pandas/_libs/src/datetime/np_datetime.c

+115
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,16 @@ void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr,
562562
convert_datetime_to_datetimestruct(&meta, val, result);
563563
}
564564

565+
void pandas_timedelta_to_timedeltastruct(npy_timedelta val, PANDAS_DATETIMEUNIT fr,
566+
pandas_timedeltastruct *result) {
567+
pandas_datetime_metadata meta;
568+
569+
meta.base = fr;
570+
meta.num - 1;
571+
572+
convert_timedelta_to_timedeltastruct(&meta, val, result);
573+
}
574+
565575
PANDAS_DATETIMEUNIT get_datetime64_unit(PyObject *obj) {
566576
return (PANDAS_DATETIMEUNIT)((PyDatetimeScalarObject *)obj)->obmeta.base;
567577
}
@@ -980,3 +990,108 @@ int convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
980990

981991
return 0;
982992
}
993+
994+
/*
995+
* Converts a timedelta from a timedeltastruct to a timedelta based
996+
* on some metadata. The timedelta is assumed to be valid.
997+
*
998+
* Returns 0 on success, -1 on failure.
999+
*/
1000+
int convert_timedelta_to_timedeltastruct(pandas_timedelta_metadata *meta,
1001+
npy_timedelta td,
1002+
pandas_timedeltastruct *out) {
1003+
1004+
npy_int64 perday;
1005+
npy_int64 frac;
1006+
npy_int64 sfrac;
1007+
npy_int64 ifrac;
1008+
int sign;
1009+
npy_int64 DAY_NS = 86400000000000LL;
1010+
1011+
/* Initialize the output to all zeros */
1012+
memset(out, 0, sizeof(pandas_timedeltastruct));
1013+
1014+
switch (meta->base) {
1015+
case PANDAS_FR_ns:
1016+
1017+
// put frac in seconds
1018+
if (td < 0 && td % (1000LL * 1000LL * 1000LL) != 0)
1019+
frac = td / (1000LL * 1000LL * 1000LL) - 1;
1020+
else
1021+
frac = td / (1000LL * 1000LL * 1000LL);
1022+
1023+
if (frac < 0) {
1024+
sign = -1;
1025+
1026+
// even fraction
1027+
if ((-frac % 86400LL) != 0) {
1028+
out->days = -frac / 86400LL + 1;
1029+
frac += 86400LL * out->days;
1030+
} else {
1031+
frac = -frac;
1032+
}
1033+
} else {
1034+
sign = 1;
1035+
out->days = 0;
1036+
}
1037+
1038+
if (frac >= 86400) {
1039+
out->days += frac / 86400LL;
1040+
frac -= out->days * 86400LL;
1041+
}
1042+
1043+
if (frac >= 3600) {
1044+
out->hrs = frac / 3600LL;
1045+
frac -= out->hrs * 3600LL;
1046+
} else {
1047+
out->hrs = 0;
1048+
}
1049+
1050+
if (frac >= 60) {
1051+
out->min = frac / 60LL;
1052+
frac -= out->min * 60LL;
1053+
} else {
1054+
out->min = 0;
1055+
}
1056+
1057+
if (frac >= 0) {
1058+
out->sec = frac;
1059+
frac -= out->sec;
1060+
} else {
1061+
out->sec = 0;
1062+
}
1063+
1064+
sfrac = (out->hrs * 3600LL + out->min * 60LL
1065+
+ out->sec) * (1000LL * 1000LL * 1000LL);
1066+
1067+
if (sign < 0)
1068+
out->days = -out->days;
1069+
1070+
ifrac = td - (out->days * DAY_NS + sfrac);
1071+
1072+
if (ifrac != 0) {
1073+
out->ms = ifrac / (1000LL * 1000LL);
1074+
ifrac -= out->ms * 1000LL * 1000LL;
1075+
out->us = ifrac / 1000LL;
1076+
ifrac -= out->us * 1000LL;
1077+
out->ns = ifrac;
1078+
} else {
1079+
out->ms = 0;
1080+
out->us = 0;
1081+
out->ns = 0;
1082+
}
1083+
1084+
out->seconds = out->hrs * 3600 + out->min * 60 + out->sec;
1085+
out->microseconds = out->ms * 1000 + out->us;
1086+
out->nanoseconds = out->ns;
1087+
break;
1088+
1089+
default:
1090+
PyErr_SetString(PyExc_RuntimeError,
1091+
"NumPy datetime metadata is corrupted with invalid "
1092+
"base unit");
1093+
return -1;
1094+
}
1095+
1096+
return 0;
1097+
}

pandas/_libs/src/datetime/np_datetime.h

+15
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,18 @@ typedef struct {
4949
npy_int32 month, day, hour, min, sec, us, ps, as;
5050
} pandas_datetimestruct;
5151

52+
typedef struct {
53+
npy_int64 days;
54+
npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds;
55+
} pandas_timedeltastruct;
56+
5257
typedef struct {
5358
PANDAS_DATETIMEUNIT base;
5459
int num;
5560
} pandas_datetime_metadata;
5661

62+
typedef pandas_datetime_metadata pandas_timedelta_metadata;
63+
5764
extern const pandas_datetimestruct _NS_MIN_DTS;
5865
extern const pandas_datetimestruct _NS_MAX_DTS;
5966

@@ -71,6 +78,9 @@ npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr,
7178
void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr,
7279
pandas_datetimestruct *result);
7380

81+
void pandas_timedelta_to_timedeltastruct(npy_timedelta val, PANDAS_DATETIMEUNIT fr,
82+
pandas_timedeltastruct *result);
83+
7484
int dayofweek(int y, int m, int d);
7585

7686
extern const int days_per_month_table[2][12];
@@ -131,6 +141,11 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
131141
npy_datetime dt,
132142
pandas_datetimestruct *out);
133143

144+
int
145+
convert_timedelta_to_timedeltastruct(pandas_timedelta_metadata *meta,
146+
npy_timedelta td,
147+
pandas_timedeltastruct *out);
148+
134149

135150
PANDAS_DATETIMEUNIT get_datetime64_unit(PyObject *obj);
136151

pandas/_libs/tslib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ from tslibs.timezones cimport (
9292
get_dst_info)
9393
from tslibs.fields import (
9494
get_date_name_field, get_start_end_field, get_date_field,
95-
build_field_sarray)
95+
build_field_sarray, get_timedelta_field)
9696
from tslibs.conversion cimport (tz_convert_single, _TSObject,
9797
convert_to_tsobject,
9898
convert_datetime_to_tsobject,

pandas/_libs/tslibs/fields.pyx

+117-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ from numpy cimport ndarray, int64_t, int32_t, int8_t
1717
np.import_array()
1818

1919

20-
from np_datetime cimport pandas_datetimestruct, dt64_to_dtstruct
20+
from np_datetime cimport pandas_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, td64_to_tdstruct
2121

2222
from datetime cimport (
2323
days_per_month_table,
@@ -544,6 +544,122 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
544544

545545
raise ValueError("Field %s not supported" % field)
546546

547+
@cython.wraparound(False)
548+
@cython.boundscheck(False)
549+
def get_timedelta_field(ndarray[int64_t] tdindex, object field):
550+
"""
551+
Given a int64-based timedelta index, extract the days, hrs, sec.,
552+
field and return an array of these values.
553+
"""
554+
cdef:
555+
Py_ssize_t i, count = 0
556+
ndarray[int32_t] out
557+
pandas_timedeltastruct tds
558+
559+
count = len(tdindex)
560+
out = np.empty(count, dtype='i4')
561+
562+
if field == 'days':
563+
with nogil:
564+
for i in range(count):
565+
if tdindex[i] == NPY_NAT:
566+
out[i] = -1
567+
continue
568+
569+
td64_to_tdstruct(tdindex[i], &tds)
570+
out[i] = tds.days
571+
return out
572+
573+
elif field == 'h':
574+
with nogil:
575+
for i in range(count):
576+
if tdindex[i] == NPY_NAT:
577+
out[i] = -1
578+
continue
579+
580+
td64_to_tdstruct(tdindex[i], &tds)
581+
out[i] = tds.hrs
582+
return out
583+
584+
elif field == 's':
585+
with nogil:
586+
for i in range(count):
587+
if tdindex[i] == NPY_NAT:
588+
out[i] = -1
589+
continue
590+
591+
td64_to_tdstruct(tdindex[i], &tds)
592+
out[i] = tds.sec
593+
return out
594+
595+
elif field == 'seconds':
596+
with nogil:
597+
for i in range(count):
598+
if tdindex[i] == NPY_NAT:
599+
out[i] = -1
600+
continue
601+
602+
td64_to_tdstruct(tdindex[i], &tds)
603+
out[i] = tds.seconds
604+
return out
605+
606+
elif field == 'ms':
607+
with nogil:
608+
for i in range(count):
609+
if tdindex[i] == NPY_NAT:
610+
out[i] = -1
611+
continue
612+
613+
td64_to_tdstruct(tdindex[i], &tds)
614+
out[i] = tds.ms
615+
return out
616+
617+
elif field == 'microseconds':
618+
with nogil:
619+
for i in range(count):
620+
if tdindex[i] == NPY_NAT:
621+
out[i] = -1
622+
continue
623+
624+
td64_to_tdstruct(tdindex[i], &tds)
625+
out[i] = tds.microseconds
626+
return out
627+
628+
elif field == 'us':
629+
with nogil:
630+
for i in range(count):
631+
if tdindex[i] == NPY_NAT:
632+
out[i] = -1
633+
continue
634+
635+
td64_to_tdstruct(tdindex[i], &tds)
636+
out[i] = tds.us
637+
return out
638+
639+
elif field == 'ns':
640+
with nogil:
641+
for i in range(count):
642+
if tdindex[i] == NPY_NAT:
643+
out[i] = -1
644+
continue
645+
646+
td64_to_tdstruct(tdindex[i], &tds)
647+
out[i] = tds.ns
648+
return out
649+
650+
elif field == 'nanoseconds':
651+
with nogil:
652+
for i in range(count):
653+
if tdindex[i] == NPY_NAT:
654+
out[i] = -1
655+
continue
656+
657+
td64_to_tdstruct(tdindex[i], &tds)
658+
out[i] = tds.nanoseconds
659+
return out
660+
661+
raise ValueError("Field %s not supported" % field)
662+
547663

548664
cdef inline int days_in_month(pandas_datetimestruct dts) nogil:
549665
return days_per_month_table[is_leapyear(dts.year)][dts.month - 1]

pandas/_libs/tslibs/np_datetime.pxd

+5
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ cdef extern from "../src/datetime/np_datetime.h":
3030
int64_t year
3131
int32_t month, day, hour, min, sec, us, ps, as
3232

33+
ctypedef struct pandas_timedeltastruct:
34+
int64_t days
35+
int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds
36+
3337
ctypedef enum PANDAS_DATETIMEUNIT:
3438
PANDAS_FR_Y
3539
PANDAS_FR_M
@@ -54,6 +58,7 @@ cdef check_dts_bounds(pandas_datetimestruct *dts)
5458

5559
cdef int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil
5660
cdef void dt64_to_dtstruct(int64_t dt64, pandas_datetimestruct* out) nogil
61+
cdef void td64_to_tdstruct(int64_t td64, pandas_timedeltastruct* out) nogil
5762

5863
cdef int64_t pydatetime_to_dt64(datetime val, pandas_datetimestruct *dts)
5964
cdef int64_t pydate_to_dt64(date val, pandas_datetimestruct *dts)

0 commit comments

Comments
 (0)