diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 4b82781fc39d9..eeca2d54381b2 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -320,3 +320,49 @@ def time_nogil_kth_smallest(self): def run(arr): algos.kth_smallest(arr, self.k) run() + +class nogil_datetime_fields(object): + goal_time = 0.2 + + def setup(self): + self.N = 100000000 + self.dti = pd.date_range('1900-01-01', periods=self.N, freq='D') + self.period = self.dti.to_period('D') + if (not have_real_test_parallel): + raise NotImplementedError + + def time_datetime_field_year(self): + @test_parallel(num_threads=2) + def run(dti): + dti.year + run(self.dti) + + def time_datetime_field_day(self): + @test_parallel(num_threads=2) + def run(dti): + dti.day + run(self.dti) + + def time_datetime_field_daysinmonth(self): + @test_parallel(num_threads=2) + def run(dti): + dti.days_in_month + run(self.dti) + + def time_datetime_field_normalize(self): + @test_parallel(num_threads=2) + def run(dti): + dti.normalize() + run(self.dti) + + def time_datetime_to_period(self): + @test_parallel(num_threads=2) + def run(dti): + dti.to_period('S') + run(self.dti) + + def time_period_to_datetime(self): + @test_parallel(num_threads=2) + def run(period): + period.to_timestamp() + run(self.period) diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 14c4276e74a3a..035e3ae2ac2f0 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -55,6 +55,10 @@ Performance Improvements - Checking monotonic-ness before sorting on an index (:issue:`11080`) + +- Release the GIL on most datetime field operations (e.g. ``DatetimeIndex.year``, ``Series.dt.year``), normalization, and conversion to and from ``Period``, ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestamp`` (:issue:`11263`) + + .. _whatsnew_0171.bug_fixes: Bug Fixes diff --git a/pandas/src/datetime.pxd b/pandas/src/datetime.pxd index 0896965162698..f2f764c785894 100644 --- a/pandas/src/datetime.pxd +++ b/pandas/src/datetime.pxd @@ -95,14 +95,14 @@ cdef extern from "datetime/np_datetime.h": int apply_tzinfo) npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, - pandas_datetimestruct *d) + pandas_datetimestruct *d) nogil void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, - pandas_datetimestruct *result) + pandas_datetimestruct *result) nogil int days_per_month_table[2][12] - int dayofweek(int y, int m, int d) - int is_leapyear(int64_t year) + int dayofweek(int y, int m, int d) nogil + int is_leapyear(int64_t year) nogil PANDAS_DATETIMEUNIT get_datetime64_unit(object o) cdef extern from "datetime/np_datetime_strings.h": diff --git a/pandas/src/period.pyx b/pandas/src/period.pyx index 2a7c2135f8045..b431bb58bc991 100644 --- a/pandas/src/period.pyx +++ b/pandas/src/period.pyx @@ -76,11 +76,11 @@ cdef extern from "period_helper.h": int64_t get_period_ordinal(int year, int month, int day, int hour, int minute, int second, int microseconds, int picoseconds, - int freq) except INT32_MIN + int freq) nogil except INT32_MIN int64_t get_python_ordinal(int64_t period_ordinal, int freq) except INT32_MIN - int get_date_info(int64_t ordinal, int freq, date_info *dinfo) except INT32_MIN + int get_date_info(int64_t ordinal, int freq, date_info *dinfo) nogil except INT32_MIN double getAbsTime(int, int64_t, int64_t) int pyear(int64_t ordinal, int freq) except INT32_MIN @@ -139,13 +139,14 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None): out = np.empty(l, dtype='i8') if tz is None: - for i in range(l): - if dtarr[i] == iNaT: - out[i] = iNaT - continue - pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts) - out[i] = get_period_ordinal(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) + with nogil: + for i in range(l): + if dtarr[i] == NPY_NAT: + out[i] = NPY_NAT + continue + pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts) + out[i] = get_period_ordinal(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, dts.us, dts.ps, freq) else: out = localize_dt64arr_to_period(dtarr, freq, tz) return out @@ -163,11 +164,12 @@ def periodarr_to_dt64arr(ndarray[int64_t] periodarr, int freq): out = np.empty(l, dtype='i8') - for i in range(l): - if periodarr[i] == iNaT: - out[i] = iNaT - continue - out[i] = period_ordinal_to_dt64(periodarr[i], freq) + with nogil: + for i in range(l): + if periodarr[i] == NPY_NAT: + out[i] = NPY_NAT + continue + out[i] = period_ordinal_to_dt64(periodarr[i], freq) return out @@ -245,13 +247,13 @@ def period_ordinal(int y, int m, int d, int h, int min, int s, int us, int ps, i return get_period_ordinal(y, m, d, h, min, s, us, ps, freq) -cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq): +cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) nogil: cdef: pandas_datetimestruct dts date_info dinfo float subsecond_fraction - if ordinal == iNaT: + if ordinal == NPY_NAT: return NPY_NAT get_date_info(ordinal, freq, &dinfo) diff --git a/pandas/src/period_helper.c b/pandas/src/period_helper.c index 032bc44de6355..e056b1fa9a522 100644 --- a/pandas/src/period_helper.c +++ b/pandas/src/period_helper.c @@ -113,7 +113,7 @@ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo, int yearoffset; /* Range check */ - Py_AssertWithArg(year > -(INT_MAX / 366) && year < (INT_MAX / 366), + Py_AssertWithArg(year > -(INT_MAX / 366) && year < (INT_MAX / 366), PyExc_ValueError, "year out of range: %i", year); @@ -136,7 +136,7 @@ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo, day); yearoffset = dInfoCalc_YearOffset(year, calendar); - if (PyErr_Occurred()) goto onError; + if (yearoffset == INT_ERR_CODE) goto onError; absdate = day + month_offset[leap][month - 1] + yearoffset; @@ -155,7 +155,7 @@ static int dInfoCalc_SetFromDateAndTime(struct date_info *dinfo, /* Calculate the absolute time */ { - Py_AssertWithArg(hour >= 0 && hour <= 23, + Py_AssertWithArg(hour >= 0 && hour <= 23, PyExc_ValueError, "hour out of range (0-23): %i", hour); @@ -212,8 +212,7 @@ int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo, while (1) { /* Calculate the year offset */ yearoffset = dInfoCalc_YearOffset(year, calendar); - if (PyErr_Occurred()) - goto onError; + if (yearoffset == INT_ERR_CODE) goto onError; /* Backward correction: absdate must be greater than the yearoffset */ @@ -310,7 +309,7 @@ static int calc_conversion_factors_matrix_size() { } matrix_size = max_value(matrix_size, period_value); } - return matrix_size + 1; + return matrix_size + 1; } static void alloc_conversion_factors_matrix(int matrix_size) { diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 398c5f0232de1..8e6d4019c69a3 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -3849,6 +3849,7 @@ def get_time_micros(ndarray[int64_t] dtindex): @cython.wraparound(False) +@cython.boundscheck(False) def get_date_field(ndarray[int64_t] dtindex, object field): ''' Given a int64-based datetime index, extract the year, month, etc., @@ -3872,130 +3873,142 @@ def get_date_field(ndarray[int64_t] dtindex, object field): out = np.empty(count, dtype='i4') if field == 'Y': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.year + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.year return out elif field == 'M': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.month + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.month return out elif field == 'D': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.day + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.day return out elif field == 'h': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.hour + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.hour return out elif field == 'm': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.min + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.min return out elif field == 's': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.sec + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.sec return out elif field == 'us': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.us + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.us return out elif field == 'ns': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.ps / 1000 + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.ps / 1000 return out elif field == 'doy': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - isleap = is_leapyear(dts.year) - out[i] = _month_offset[isleap, dts.month-1] + dts.day + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + out[i] = _month_offset[isleap, dts.month-1] + dts.day return out elif field == 'dow': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue - ts = convert_to_tsobject(dtindex[i], None, None) - out[i] = ts_dayofweek(ts) + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dayofweek(dts.year, dts.month, dts.day) return out elif field == 'woy': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue - - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None, None) - isleap = is_leapyear(dts.year) - isleap_prev = is_leapyear(dts.year - 1) - mo_off = _month_offset[isleap, dts.month - 1] - doy = mo_off + dts.day - dow = ts_dayofweek(ts) - - #estimate - woy = (doy - 1) - dow + 3 - if woy >= 0: - woy = woy / 7 + 1 - - # verify - if woy < 0: - if (woy > -2) or (woy == -2 and isleap_prev): - woy = 53 - else: - woy = 52 - elif woy == 53: - if 31 - dts.day + dow < 3: - woy = 1 + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue + + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + isleap = is_leapyear(dts.year) + isleap_prev = is_leapyear(dts.year - 1) + mo_off = _month_offset[isleap, dts.month - 1] + doy = mo_off + dts.day + dow = dayofweek(dts.year, dts.month, dts.day) + + #estimate + woy = (doy - 1) - dow + 3 + if woy >= 0: + woy = woy / 7 + 1 + + # verify + if woy < 0: + if (woy > -2) or (woy == -2 and isleap_prev): + woy = 53 + else: + woy = 52 + elif woy == 53: + if 31 - dts.day + dow < 3: + woy = 1 - out[i] = woy + out[i] = woy return out elif field == 'q': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - out[i] = dts.month - out[i] = ((out[i] - 1) / 3) + 1 + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + out[i] = dts.month + out[i] = ((out[i] - 1) / 3) + 1 return out elif field == 'dim': - for i in range(count): - if dtindex[i] == NPY_NAT: out[i] = -1; continue + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = -1; continue - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - out[i] = monthrange(dts.year, dts.month)[1] + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + out[i] = days_in_month(dts) return out raise ValueError("Field %s not supported" % field) @@ -4239,12 +4252,13 @@ def date_normalize(ndarray[int64_t] stamps, tz=None): tz = maybe_get_tz(tz) result = _normalize_local(stamps, tz) else: - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) - result[i] = _normalized_stamp(&dts) + with nogil: + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) + result[i] = _normalized_stamp(&dts) return result @@ -4256,12 +4270,13 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz): pandas_datetimestruct dts if _is_utc(tz): - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) - result[i] = _normalized_stamp(&dts) + with nogil: + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts) + result[i] = _normalized_stamp(&dts) elif _is_tzlocal(tz): for i in range(n): if stamps[i] == NPY_NAT: @@ -4304,7 +4319,7 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz): return result -cdef inline int64_t _normalized_stamp(pandas_datetimestruct *dts): +cdef inline int64_t _normalized_stamp(pandas_datetimestruct *dts) nogil: dts.hour = 0 dts.min = 0 dts.sec = 0 @@ -4369,6 +4384,8 @@ def monthrange(int64_t year, int64_t month): cdef inline int64_t ts_dayofweek(_TSObject ts): return dayofweek(ts.dts.year, ts.dts.month, ts.dts.day) +cdef inline int days_in_month(pandas_datetimestruct dts) nogil: + return days_per_month_table[is_leapyear(dts.year)][dts.month-1] cpdef normalize_date(object dt): ''' @@ -4388,17 +4405,18 @@ cpdef normalize_date(object dt): cdef inline int _year_add_months(pandas_datetimestruct dts, - int months): + int months) nogil: '''new year number after shifting pandas_datetimestruct number of months''' return dts.year + (dts.month + months - 1) / 12 cdef inline int _month_add_months(pandas_datetimestruct dts, - int months): + int months) nogil: '''new month number after shifting pandas_datetimestruct number of months''' cdef int new_month = (dts.month + months) % 12 return 12 if new_month == 0 else new_month @cython.wraparound(False) +@cython.boundscheck(False) def shift_months(int64_t[:] dtindex, int months, object day=None): ''' Given an int64-based datetime index, shift all elements @@ -4411,24 +4429,26 @@ def shift_months(int64_t[:] dtindex, int months, object day=None): ''' cdef: Py_ssize_t i - int days_in_month pandas_datetimestruct dts int count = len(dtindex) + cdef int days_in_current_month int64_t[:] out = np.empty(count, dtype='int64') - for i in range(count): - if dtindex[i] == NPY_NAT: - out[i] = NPY_NAT - else: - pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - - if day is None: + if day is None: + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = NPY_NAT; continue + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) dts.year = _year_add_months(dts, months) dts.month = _month_add_months(dts, months) - #prevent day from wrapping around month end - days_in_month = days_per_month_table[is_leapyear(dts.year)][dts.month-1] - dts.day = min(dts.day, days_in_month) - elif day == 'start': + + dts.day = min(dts.day, days_in_month(dts)) + out[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) + elif day == 'start': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = NPY_NAT; continue + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) dts.year = _year_add_months(dts, months) dts.month = _month_add_months(dts, months) @@ -4439,21 +4459,28 @@ def shift_months(int64_t[:] dtindex, int months, object day=None): dts.month = _month_add_months(dts, -1) else: dts.day = 1 - elif day == 'end': - days_in_month = days_per_month_table[is_leapyear(dts.year)][dts.month-1] + out[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) + elif day == 'end': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: out[i] = NPY_NAT; continue + pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) + days_in_current_month = days_in_month(dts) + dts.year = _year_add_months(dts, months) dts.month = _month_add_months(dts, months) # similar semantics - when adding shift forward by one # month if already at an end of month - if months >= 0 and dts.day == days_in_month: + if months >= 0 and dts.day == days_in_current_month: dts.year = _year_add_months(dts, 1) dts.month = _month_add_months(dts, 1) - days_in_month = days_per_month_table[is_leapyear(dts.year)][dts.month-1] - dts.day = days_in_month + dts.day = days_in_month(dts) + out[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) + else: + raise ValueError("day must be None, 'start' or 'end'") - out[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) return np.asarray(out) #----------------------------------------------------------------------