From babd47e15532e5580215a18c0c53dc858f792906 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Jun 2022 08:15:01 -0700 Subject: [PATCH] REF: avoid ravel in DTA._add_offset --- pandas/_libs/tslibs/offsets.pyx | 330 +++++++++++++++++--------------- pandas/core/arrays/datetimes.py | 2 - 2 files changed, 172 insertions(+), 160 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 5a21b1a3abb1d..d37c287be4cfd 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -2370,59 +2370,67 @@ cdef class SemiMonthOffset(SingleConstructorOffset): @cython.boundscheck(False) def _apply_array(self, dtarr): cdef: - int64_t[:] i8other = dtarr.view("i8") - Py_ssize_t i, count = len(i8other) - int64_t val - int64_t[::1] out = np.empty(count, dtype="i8") + ndarray i8other = dtarr.view("i8") + Py_ssize_t i, count = dtarr.size + int64_t val, res_val + ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) npy_datetimestruct dts int months, to_day, nadj, n = self.n int days_in_month, day, anchor_dom = self.day_of_month bint is_start = isinstance(self, SemiMonthBegin) NPY_DATETIMEUNIT reso = get_unit_from_dtype(dtarr.dtype) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, i8other) with nogil: for i in range(count): - val = i8other[i] - if val == NPY_NAT: - out[i] = NPY_NAT - continue + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - pandas_datetime_to_datetimestruct(val, reso, &dts) - day = dts.day - - # Adjust so that we are always looking at self.day_of_month, - # incrementing/decrementing n if necessary. - nadj = roll_convention(day, n, anchor_dom) - - days_in_month = get_days_in_month(dts.year, dts.month) - # For SemiMonthBegin on other.day == 1 and - # SemiMonthEnd on other.day == days_in_month, - # shifting `other` to `self.day_of_month` _always_ requires - # incrementing/decrementing `n`, regardless of whether it is - # initially positive. - if is_start and (n <= 0 and day == 1): - nadj -= 1 - elif (not is_start) and (n > 0 and day == days_in_month): - nadj += 1 - - if is_start: - # See also: SemiMonthBegin._apply - months = nadj // 2 + nadj % 2 - to_day = 1 if nadj % 2 else anchor_dom + if val == NPY_NAT: + res_val = NPY_NAT else: - # See also: SemiMonthEnd._apply - months = nadj // 2 - to_day = 31 if nadj % 2 else anchor_dom - - dts.year = year_add_months(dts, months) - dts.month = month_add_months(dts, months) - days_in_month = get_days_in_month(dts.year, dts.month) - dts.day = min(to_day, days_in_month) - - out[i] = npy_datetimestruct_to_datetime(reso, &dts) + pandas_datetime_to_datetimestruct(val, reso, &dts) + day = dts.day + + # Adjust so that we are always looking at self.day_of_month, + # incrementing/decrementing n if necessary. + nadj = roll_convention(day, n, anchor_dom) + + days_in_month = get_days_in_month(dts.year, dts.month) + # For SemiMonthBegin on other.day == 1 and + # SemiMonthEnd on other.day == days_in_month, + # shifting `other` to `self.day_of_month` _always_ requires + # incrementing/decrementing `n`, regardless of whether it is + # initially positive. + if is_start and (n <= 0 and day == 1): + nadj -= 1 + elif (not is_start) and (n > 0 and day == days_in_month): + nadj += 1 + + if is_start: + # See also: SemiMonthBegin._apply + months = nadj // 2 + nadj % 2 + to_day = 1 if nadj % 2 else anchor_dom + + else: + # See also: SemiMonthEnd._apply + months = nadj // 2 + to_day = 31 if nadj % 2 else anchor_dom + + dts.year = year_add_months(dts, months) + dts.month = month_add_months(dts, months) + days_in_month = get_days_in_month(dts.year, dts.month) + dts.day = min(to_day, days_in_month) + + res_val = npy_datetimestruct_to_datetime(reso, &dts) + + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + + cnp.PyArray_MultiIter_NEXT(mi) - return out.base + return out cdef class SemiMonthEnd(SemiMonthOffset): @@ -2540,7 +2548,7 @@ cdef class Week(SingleConstructorOffset): @cython.wraparound(False) @cython.boundscheck(False) - cdef _end_apply_index(self, const int64_t[:] i8other, NPY_DATETIMEUNIT reso): + cdef ndarray _end_apply_index(self, ndarray i8other, NPY_DATETIMEUNIT reso): """ Add self to the given DatetimeIndex, specialized for case where self.weekday is non-null. @@ -2555,34 +2563,41 @@ cdef class Week(SingleConstructorOffset): ndarray[int64_t] """ cdef: - Py_ssize_t i, count = len(i8other) - int64_t val - int64_t[::1] out = np.empty(count, dtype="i8") + Py_ssize_t i, count = i8other.size + int64_t val, res_val + ndarray out = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) npy_datetimestruct dts int wday, days, weeks, n = self.n int anchor_weekday = self.weekday int64_t DAY_PERIODS = periods_per_day(reso) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, i8other) with nogil: for i in range(count): - val = i8other[i] + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + if val == NPY_NAT: - out[i] = NPY_NAT - continue + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + wday = dayofweek(dts.year, dts.month, dts.day) - pandas_datetime_to_datetimestruct(val, reso, &dts) - wday = dayofweek(dts.year, dts.month, dts.day) + days = 0 + weeks = n + if wday != anchor_weekday: + days = (anchor_weekday - wday) % 7 + if weeks > 0: + weeks -= 1 - days = 0 - weeks = n - if wday != anchor_weekday: - days = (anchor_weekday - wday) % 7 - if weeks > 0: - weeks -= 1 + res_val = val + (7 * weeks + days) * DAY_PERIODS - out[i] = val + (7 * weeks + days) * DAY_PERIODS + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - return out.base + cnp.PyArray_MultiIter_NEXT(mi) + + return out def is_on_offset(self, dt: datetime) -> bool: if self.normalize and not _is_normalized(dt): @@ -3726,11 +3741,11 @@ cdef inline int month_add_months(npy_datetimestruct dts, int months) nogil: @cython.wraparound(False) @cython.boundscheck(False) -cdef shift_quarters( - const int64_t[:] dtindex, +cdef ndarray shift_quarters( + ndarray dtindex, int quarters, int q1start_month, - object day_opt, + str day_opt, int modby=3, NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, ): @@ -3751,26 +3766,53 @@ cdef shift_quarters( ------- out : ndarray[int64_t] """ - cdef: - Py_ssize_t count = len(dtindex) - int64_t[::1] out = np.empty(count, dtype="int64") - if day_opt not in ["start", "end", "business_start", "business_end"]: raise ValueError("day must be None, 'start', 'end', " "'business_start', or 'business_end'") - _shift_quarters( - dtindex, out, count, quarters, q1start_month, day_opt, modby, reso=reso - ) - return np.asarray(out) + cdef: + Py_ssize_t count = dtindex.size + ndarray out = cnp.PyArray_EMPTY(dtindex.ndim, dtindex.shape, cnp.NPY_INT64, 0) + Py_ssize_t i + int64_t val, res_val + int months_since, n + npy_datetimestruct dts + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, dtindex) + + with nogil: + for i in range(count): + # Analogous to: val = dtindex[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + n = quarters + + months_since = (dts.month - q1start_month) % modby + n = _roll_qtrday(&dts, n, months_since, day_opt) + + dts.year = year_add_months(dts, modby * n - months_since) + dts.month = month_add_months(dts, modby * n - months_since) + dts.day = get_day_of_month(&dts, day_opt) + + res_val = npy_datetimestruct_to_datetime(reso, &dts) + + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val + + cnp.PyArray_MultiIter_NEXT(mi) + + return out @cython.wraparound(False) @cython.boundscheck(False) def shift_months( - const int64_t[:] dtindex, + ndarray dtindex, # int64_t, arbitrary ndim int months, - object day_opt=None, + str day_opt=None, NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, ): """ @@ -3785,107 +3827,72 @@ def shift_months( cdef: Py_ssize_t i npy_datetimestruct dts - int count = len(dtindex) - int64_t[::1] out = np.empty(count, dtype="int64") - - if day_opt is None: - with nogil: - for i in range(count): - if dtindex[i] == NPY_NAT: - out[i] = NPY_NAT - continue - - pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) - dts.year = year_add_months(dts, months) - dts.month = month_add_months(dts, months) + int count = dtindex.size + ndarray out = cnp.PyArray_EMPTY(dtindex.ndim, dtindex.shape, cnp.NPY_INT64, 0) + int months_to_roll + int64_t val, res_val - dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) - out[i] = npy_datetimestruct_to_datetime(reso, &dts) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(out, dtindex) - elif day_opt in ["start", "end", "business_start", "business_end"]: - _shift_months(dtindex, out, count, months, day_opt, reso=reso) - else: + if day_opt not in [None, "start", "end", "business_start", "business_end"]: raise ValueError("day must be None, 'start', 'end', " "'business_start', or 'business_end'") - return np.asarray(out) - + if day_opt is None: + # TODO: can we combine this with the non-None case? + with nogil: + for i in range(count): + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] -@cython.wraparound(False) -@cython.boundscheck(False) -cdef inline void _shift_months(const int64_t[:] dtindex, - int64_t[::1] out, - Py_ssize_t count, - int months, - str day_opt, - NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, - ) nogil: - """ - See shift_months.__doc__ - """ - cdef: - Py_ssize_t i - int months_to_roll - npy_datetimestruct dts + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + dts.year = year_add_months(dts, months) + dts.month = month_add_months(dts, months) - for i in range(count): - if dtindex[i] == NPY_NAT: - out[i] = NPY_NAT - continue + dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) + res_val = npy_datetimestruct_to_datetime(reso, &dts) - pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) - months_to_roll = months + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - months_to_roll = _roll_qtrday(&dts, months_to_roll, 0, day_opt) + cnp.PyArray_MultiIter_NEXT(mi) - dts.year = year_add_months(dts, months_to_roll) - dts.month = month_add_months(dts, months_to_roll) - dts.day = get_day_of_month(&dts, day_opt) + else: + with nogil: + for i in range(count): - out[i] = npy_datetimestruct_to_datetime(reso, &dts) + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + if val == NPY_NAT: + res_val = NPY_NAT + else: + pandas_datetime_to_datetimestruct(val, reso, &dts) + months_to_roll = months -@cython.wraparound(False) -@cython.boundscheck(False) -cdef inline void _shift_quarters(const int64_t[:] dtindex, - int64_t[::1] out, - Py_ssize_t count, - int quarters, - int q1start_month, - str day_opt, - int modby, - NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, - ) nogil: - """ - See shift_quarters.__doc__ - """ - cdef: - Py_ssize_t i - int months_since, n - npy_datetimestruct dts + months_to_roll = _roll_qtrday(&dts, months_to_roll, 0, day_opt) - for i in range(count): - if dtindex[i] == NPY_NAT: - out[i] = NPY_NAT - continue + dts.year = year_add_months(dts, months_to_roll) + dts.month = month_add_months(dts, months_to_roll) + dts.day = get_day_of_month(&dts, day_opt) - pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) - n = quarters + res_val = npy_datetimestruct_to_datetime(reso, &dts) - months_since = (dts.month - q1start_month) % modby - n = _roll_qtrday(&dts, n, months_since, day_opt) + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - dts.year = year_add_months(dts, modby * n - months_since) - dts.month = month_add_months(dts, modby * n - months_since) - dts.day = get_day_of_month(&dts, day_opt) + cnp.PyArray_MultiIter_NEXT(mi) - out[i] = npy_datetimestruct_to_datetime(reso, &dts) + return out @cython.wraparound(False) @cython.boundscheck(False) -cdef ndarray[int64_t] _shift_bdays( - const int64_t[:] i8other, +cdef ndarray _shift_bdays( + ndarray i8other, int periods, NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, ): @@ -3903,17 +3910,20 @@ cdef ndarray[int64_t] _shift_bdays( ndarray[int64_t] """ cdef: - Py_ssize_t i, n = len(i8other) - int64_t[::1] result = np.empty(n, dtype="i8") - int64_t val, res + Py_ssize_t i, n = i8other.size + ndarray result = cnp.PyArray_EMPTY(i8other.ndim, i8other.shape, cnp.NPY_INT64, 0) + int64_t val, res_val int wday, nadj, days npy_datetimestruct dts int64_t DAY_PERIODS = periods_per_day(reso) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, i8other) for i in range(n): - val = i8other[i] + # Analogous to: val = i8other[i] + val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + if val == NPY_NAT: - result[i] = NPY_NAT + res_val = NPY_NAT else: # The rest of this is effectively a copy of BusinessDay.apply nadj = periods @@ -3941,10 +3951,14 @@ cdef ndarray[int64_t] _shift_bdays( # shift by nadj days plus 2 to get past the weekend days = nadj + 2 - res = val + (7 * weeks + days) * DAY_PERIODS - result[i] = res + res_val = val + (7 * weeks + days) * DAY_PERIODS + + # Analogous to: out[i] = res_val + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val - return result.base + cnp.PyArray_MultiIter_NEXT(mi) + + return result def shift_month(stamp: datetime, months: int, day_opt: object = None) -> datetime: diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 9ae3eecd9b1ca..7362e029c3d53 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -757,8 +757,6 @@ def _sub_datetime_arraylike(self, other): return new_values.view("timedelta64[ns]") def _add_offset(self, offset) -> DatetimeArray: - if self.ndim == 2: - return self.ravel()._add_offset(offset).reshape(self.shape) assert not isinstance(offset, Tick) try: