Skip to content

Commit 7e5b223

Browse files
committed
Merge pull request #11263 from chris-b1/tslib-gil
PERF: Release GIL on some datetime ops
2 parents 6b204e4 + 3ecc1e7 commit 7e5b223

File tree

6 files changed

+213
-135
lines changed

6 files changed

+213
-135
lines changed

asv_bench/benchmarks/gil.py

+46
Original file line numberDiff line numberDiff line change
@@ -320,3 +320,49 @@ def time_nogil_kth_smallest(self):
320320
def run(arr):
321321
algos.kth_smallest(arr, self.k)
322322
run()
323+
324+
class nogil_datetime_fields(object):
325+
goal_time = 0.2
326+
327+
def setup(self):
328+
self.N = 100000000
329+
self.dti = pd.date_range('1900-01-01', periods=self.N, freq='D')
330+
self.period = self.dti.to_period('D')
331+
if (not have_real_test_parallel):
332+
raise NotImplementedError
333+
334+
def time_datetime_field_year(self):
335+
@test_parallel(num_threads=2)
336+
def run(dti):
337+
dti.year
338+
run(self.dti)
339+
340+
def time_datetime_field_day(self):
341+
@test_parallel(num_threads=2)
342+
def run(dti):
343+
dti.day
344+
run(self.dti)
345+
346+
def time_datetime_field_daysinmonth(self):
347+
@test_parallel(num_threads=2)
348+
def run(dti):
349+
dti.days_in_month
350+
run(self.dti)
351+
352+
def time_datetime_field_normalize(self):
353+
@test_parallel(num_threads=2)
354+
def run(dti):
355+
dti.normalize()
356+
run(self.dti)
357+
358+
def time_datetime_to_period(self):
359+
@test_parallel(num_threads=2)
360+
def run(dti):
361+
dti.to_period('S')
362+
run(self.dti)
363+
364+
def time_period_to_datetime(self):
365+
@test_parallel(num_threads=2)
366+
def run(period):
367+
period.to_timestamp()
368+
run(self.period)

doc/source/whatsnew/v0.17.1.txt

+4
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ Performance Improvements
5858

5959
- Checking monotonic-ness before sorting on an index (:issue:`11080`)
6060

61+
62+
- Release the GIL on most datetime field operations (e.g. ``DatetimeIndex.year``, ``Series.dt.year``), normalization, and conversion to and from ``Period``, ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestamp`` (:issue:`11263`)
63+
64+
6165
.. _whatsnew_0171.bug_fixes:
6266

6367
Bug Fixes

pandas/src/datetime.pxd

+4-4
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,14 @@ cdef extern from "datetime/np_datetime.h":
9595
int apply_tzinfo)
9696

9797
npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr,
98-
pandas_datetimestruct *d)
98+
pandas_datetimestruct *d) nogil
9999
void pandas_datetime_to_datetimestruct(npy_datetime val,
100100
PANDAS_DATETIMEUNIT fr,
101-
pandas_datetimestruct *result)
101+
pandas_datetimestruct *result) nogil
102102
int days_per_month_table[2][12]
103103

104-
int dayofweek(int y, int m, int d)
105-
int is_leapyear(int64_t year)
104+
int dayofweek(int y, int m, int d) nogil
105+
int is_leapyear(int64_t year) nogil
106106
PANDAS_DATETIMEUNIT get_datetime64_unit(object o)
107107

108108
cdef extern from "datetime/np_datetime_strings.h":

pandas/src/period.pyx

+18-16
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,11 @@ cdef extern from "period_helper.h":
7676

7777
int64_t get_period_ordinal(int year, int month, int day,
7878
int hour, int minute, int second, int microseconds, int picoseconds,
79-
int freq) except INT32_MIN
79+
int freq) nogil except INT32_MIN
8080

8181
int64_t get_python_ordinal(int64_t period_ordinal, int freq) except INT32_MIN
8282

83-
int get_date_info(int64_t ordinal, int freq, date_info *dinfo) except INT32_MIN
83+
int get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil except INT32_MIN
8484
double getAbsTime(int, int64_t, int64_t)
8585

8686
int pyear(int64_t ordinal, int freq) except INT32_MIN
@@ -139,13 +139,14 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None):
139139
out = np.empty(l, dtype='i8')
140140

141141
if tz is None:
142-
for i in range(l):
143-
if dtarr[i] == iNaT:
144-
out[i] = iNaT
145-
continue
146-
pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts)
147-
out[i] = get_period_ordinal(dts.year, dts.month, dts.day,
148-
dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq)
142+
with nogil:
143+
for i in range(l):
144+
if dtarr[i] == NPY_NAT:
145+
out[i] = NPY_NAT
146+
continue
147+
pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts)
148+
out[i] = get_period_ordinal(dts.year, dts.month, dts.day,
149+
dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq)
149150
else:
150151
out = localize_dt64arr_to_period(dtarr, freq, tz)
151152
return out
@@ -163,11 +164,12 @@ def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq):
163164

164165
out = np.empty(l, dtype='i8')
165166

166-
for i in range(l):
167-
if periodarr[i] == iNaT:
168-
out[i] = iNaT
169-
continue
170-
out[i] = period_ordinal_to_dt64(periodarr[i], freq)
167+
with nogil:
168+
for i in range(l):
169+
if periodarr[i] == NPY_NAT:
170+
out[i] = NPY_NAT
171+
continue
172+
out[i] = period_ordinal_to_dt64(periodarr[i], freq)
171173

172174
return out
173175

@@ -245,13 +247,13 @@ def period_ordinal(int y, int m, int d, int h, int min, int s, int us, int ps, i
245247
return get_period_ordinal(y, m, d, h, min, s, us, ps, freq)
246248

247249

248-
cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq):
250+
cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) nogil:
249251
cdef:
250252
pandas_datetimestruct dts
251253
date_info dinfo
252254
float subsecond_fraction
253255

254-
if ordinal == iNaT:
256+
if ordinal == NPY_NAT:
255257
return NPY_NAT
256258

257259
get_date_info(ordinal, freq, &dinfo)

pandas/src/period_helper.c

+5-6
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo,
113113
int yearoffset;
114114

115115
/* Range check */
116-
Py_AssertWithArg(year > -(INT_MAX / 366) && year < (INT_MAX / 366),
116+
Py_AssertWithArg(year > -(INT_MAX / 366) && year < (INT_MAX / 366),
117117
PyExc_ValueError,
118118
"year out of range: %i",
119119
year);
@@ -136,7 +136,7 @@ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo,
136136
day);
137137

138138
yearoffset = dInfoCalc_YearOffset(year, calendar);
139-
if (PyErr_Occurred()) goto onError;
139+
if (yearoffset == INT_ERR_CODE) goto onError;
140140

141141
absdate = day + month_offset[leap][month - 1] + yearoffset;
142142

@@ -155,7 +155,7 @@ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo,
155155

156156
/* Calculate the absolute time */
157157
{
158-
Py_AssertWithArg(hour >= 0 && hour <= 23,
158+
Py_AssertWithArg(hour >= 0 && hour <= 23,
159159
PyExc_ValueError,
160160
"hour out of range (0-23): %i",
161161
hour);
@@ -212,8 +212,7 @@ int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo,
212212
while (1) {
213213
/* Calculate the year offset */
214214
yearoffset = dInfoCalc_YearOffset(year, calendar);
215-
if (PyErr_Occurred())
216-
goto onError;
215+
if (yearoffset == INT_ERR_CODE) goto onError;
217216

218217
/* Backward correction: absdate must be greater than the
219218
yearoffset */
@@ -310,7 +309,7 @@ static int calc_conversion_factors_matrix_size() {
310309
}
311310
matrix_size = max_value(matrix_size, period_value);
312311
}
313-
return matrix_size + 1;
312+
return matrix_size + 1;
314313
}
315314

316315
static void alloc_conversion_factors_matrix(int matrix_size) {

0 commit comments

Comments
 (0)