diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py index ea826e8270ace..849776bf9a591 100644 --- a/asv_bench/benchmarks/offset.py +++ b/asv_bench/benchmarks/offset.py @@ -38,6 +38,25 @@ def time_apply_series(self, param): self.ser + self.offset +class OnOffset(object): + goal_time = 0.2 + + params = [pd.offsets.QuarterBegin(), pd.offsets.QuarterEnd(), + pd.offsets.BQuarterBegin(), pd.offsets.BQuarterEnd()] + param_names = ['offset'] + + def setup(self, offset): + self.offset = offset + self.dates = [datetime(2016, m, d) + for m in [10, 11, 12] + for d in [1, 2, 3, 28, 29, 30, 31] + if not (m == 11 and d == 31)] + + def time_on_offset(self, offset): + for date in self.dates: + self.offset.onOffset(date) + + class DatetimeIndexArithmetic(object): goal_time = 0.2 diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 78b8ca8d5a480..d0b27e1d22a89 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -116,6 +116,7 @@ Performance Improvements - ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`) - Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`) - Improved performance of ``IntervalIndex.symmetric_difference()`` (:issue:`18475`) +- Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`) .. _whatsnew_0220.docs: diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 4ed4d4a9b7b99..654c51f0ca842 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -554,8 +554,58 @@ def shift_months(int64_t[:] dtindex, int months, object day=None): dts.day = get_days_in_month(dts.year, dts.month) out[i] = dtstruct_to_dt64(&dts) + + elif day == 'business_start': + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + months_to_roll = months + wkday, days_in_month = monthrange(dts.year, dts.month) + compare_day = get_firstbday(wkday, days_in_month) + + if months_to_roll > 0 and dts.day < compare_day: + months_to_roll -= 1 + elif months_to_roll <= 0 and dts.day > compare_day: + # as if rolled forward already + months_to_roll += 1 + + dts.year = year_add_months(dts, months_to_roll) + dts.month = month_add_months(dts, months_to_roll) + + wkday, days_in_month = monthrange(dts.year, dts.month) + dts.day = get_firstbday(wkday, days_in_month) + out[i] = dtstruct_to_dt64(&dts) + + elif day == 'business_end': + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + months_to_roll = months + wkday, days_in_month = monthrange(dts.year, dts.month) + compare_day = get_lastbday(wkday, days_in_month) + + if months_to_roll > 0 and dts.day < compare_day: + months_to_roll -= 1 + elif months_to_roll <= 0 and dts.day > compare_day: + # as if rolled forward already + months_to_roll += 1 + + dts.year = year_add_months(dts, months_to_roll) + dts.month = month_add_months(dts, months_to_roll) + + wkday, days_in_month = monthrange(dts.year, dts.month) + dts.day = get_lastbday(wkday, days_in_month) + out[i] = dtstruct_to_dt64(&dts) + else: - raise ValueError("day must be None, 'start' or 'end'") + raise ValueError("day must be None, 'start', 'end', " + "'business_start', or 'business_end'") return np.asarray(out) diff --git a/pandas/tests/tseries/offsets/test_yqm_offsets.py b/pandas/tests/tseries/offsets/test_yqm_offsets.py index 1d47cf67c6e55..292dd5eba938e 100644 --- a/pandas/tests/tseries/offsets/test_yqm_offsets.py +++ b/pandas/tests/tseries/offsets/test_yqm_offsets.py @@ -6,6 +6,7 @@ import pytest +import pandas as pd from pandas import Timestamp from pandas import compat @@ -32,6 +33,35 @@ def test_quarterly_dont_normalize(): assert (result.time() == date.time()) +@pytest.mark.parametrize('offset', [MonthBegin(), MonthEnd(), + BMonthBegin(), BMonthEnd()]) +def test_apply_index(offset): + rng = pd.date_range(start='1/1/2000', periods=100000, freq='T') + ser = pd.Series(rng) + + res = rng + offset + res_v2 = offset.apply_index(rng) + assert (res == res_v2).all() + assert res[0] == rng[0] + offset + assert res[-1] == rng[-1] + offset + res2 = ser + offset + # apply_index is only for indexes, not series, so no res2_v2 + assert res2.iloc[0] == ser.iloc[0] + offset + assert res2.iloc[-1] == ser.iloc[-1] + offset + + +@pytest.mark.parametrize('offset', [QuarterBegin(), QuarterEnd(), + BQuarterBegin(), BQuarterEnd()]) +def test_on_offset(offset): + dates = [datetime(2016, m, d) + for m in [10, 11, 12] + for d in [1, 2, 3, 28, 29, 30, 31] if not (m == 11 and d == 31)] + for date in dates: + res = offset.onOffset(date) + slow_version = date == (date + offset) - offset + assert res == slow_version + + # -------------------------------------------------------------------- # Months diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index a307b7e5817a8..8e1ead5dfbe9e 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -929,8 +929,9 @@ def name(self): if self.isAnchored: return self.rule_code else: + month = liboffsets._int_to_month[self.n] return "{code}-{month}".format(code=self.rule_code, - month=_int_to_month[self.n]) + month=month) def onOffset(self, dt): if self.normalize and not _is_normalized(dt): @@ -950,28 +951,23 @@ def apply(self, other): return shift_month(other, n, self._day_opt) + @apply_index_wraps + def apply_index(self, i): + shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt) + return i._shallow_copy(shifted) + class MonthEnd(MonthOffset): """DateOffset of one month end""" _prefix = 'M' _day_opt = 'end' - @apply_index_wraps - def apply_index(self, i): - shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt) - return i._shallow_copy(shifted) - class MonthBegin(MonthOffset): """DateOffset of one month at beginning""" _prefix = 'MS' _day_opt = 'start' - @apply_index_wraps - def apply_index(self, i): - shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt) - return i._shallow_copy(shifted) - class BusinessMonthEnd(MonthOffset): """DateOffset increments between business EOM dates""" @@ -1008,6 +1004,7 @@ class CustomBusinessMonthEnd(BusinessMixin, MonthOffset): _prefix = 'CBM' onOffset = DateOffset.onOffset # override MonthOffset method + apply_index = DateOffset.apply_index # override MonthOffset method def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', holidays=None, calendar=None, offset=timedelta(0)): @@ -1083,6 +1080,7 @@ class CustomBusinessMonthBegin(BusinessMixin, MonthOffset): _prefix = 'CBMS' onOffset = DateOffset.onOffset # override MonthOffset method + apply_index = DateOffset.apply_index # override MonthOffset method def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', holidays=None, calendar=None, offset=timedelta(0)): @@ -1603,7 +1601,7 @@ def isAnchored(self): def _from_name(cls, suffix=None): kwargs = {} if suffix: - kwargs['startingMonth'] = _month_to_int[suffix] + kwargs['startingMonth'] = liboffsets._month_to_int[suffix] else: if cls._from_name_startingMonth is not None: kwargs['startingMonth'] = cls._from_name_startingMonth @@ -1611,7 +1609,7 @@ def _from_name(cls, suffix=None): @property def rule_code(self): - month = _int_to_month[self.startingMonth] + month = liboffsets._int_to_month[self.startingMonth] return '{prefix}-{month}'.format(prefix=self._prefix, month=month) @apply_wraps @@ -1631,6 +1629,12 @@ def apply(self, other): return shift_month(other, 3 * n - months_since, self._day_opt) + def onOffset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + modMonth = (dt.month - self.startingMonth) % 3 + return modMonth == 0 and dt.day == self._get_offset_day(dt) + class BQuarterEnd(QuarterOffset): """DateOffset increments between business Quarter dates @@ -1644,16 +1648,6 @@ class BQuarterEnd(QuarterOffset): _prefix = 'BQ' _day_opt = 'business_end' - def onOffset(self, dt): - if self.normalize and not _is_normalized(dt): - return False - modMonth = (dt.month - self.startingMonth) % 3 - return modMonth == 0 and dt.day == self._get_offset_day(dt) - - -_int_to_month = tslib._MONTH_ALIASES -_month_to_int = {v: k for k, v in _int_to_month.items()} - # TODO: This is basically the same as BQuarterEnd class BQuarterBegin(QuarterOffset): @@ -1680,12 +1674,6 @@ class QuarterEnd(EndMixin, QuarterOffset): def apply_index(self, i): return self._end_apply_index(i, self.freqstr) - def onOffset(self, dt): - if self.normalize and not _is_normalized(dt): - return False - modMonth = (dt.month - self.startingMonth) % 3 - return modMonth == 0 and dt.day == self._get_offset_day(dt) - class QuarterBegin(BeginMixin, QuarterOffset): _outputName = 'QuarterBegin' @@ -1697,7 +1685,8 @@ class QuarterBegin(BeginMixin, QuarterOffset): @apply_index_wraps def apply_index(self, i): freq_month = 12 if self.startingMonth == 1 else self.startingMonth - 1 - freqstr = 'Q-{month}'.format(month=_int_to_month[freq_month]) + month = liboffsets._int_to_month[freq_month] + freqstr = 'Q-{month}'.format(month=month) return self._beg_apply_index(i, freqstr) @@ -1738,12 +1727,12 @@ def __init__(self, n=1, normalize=False, month=None): def _from_name(cls, suffix=None): kwargs = {} if suffix: - kwargs['month'] = _month_to_int[suffix] + kwargs['month'] = liboffsets._month_to_int[suffix] return cls(**kwargs) @property def rule_code(self): - month = _int_to_month[self.month] + month = liboffsets._int_to_month[self.month] return '{prefix}-{month}'.format(prefix=self._prefix, month=month) @@ -1784,7 +1773,8 @@ class YearBegin(BeginMixin, YearOffset): @apply_index_wraps def apply_index(self, i): freq_month = 12 if self.month == 1 else self.month - 1 - freqstr = 'A-{month}'.format(month=_int_to_month[freq_month]) + month = liboffsets._int_to_month[freq_month] + freqstr = 'A-{month}'.format(month=month) return self._beg_apply_index(i, freqstr) @@ -1969,7 +1959,7 @@ def _get_suffix_prefix(self): def get_rule_code_suffix(self): prefix = self._get_suffix_prefix() - month = _int_to_month[self.startingMonth] + month = liboffsets._int_to_month[self.startingMonth] weekday = _int_to_weekday[self.weekday] return '{prefix}-{month}-{weekday}'.format(prefix=prefix, month=month, weekday=weekday) @@ -1984,7 +1974,7 @@ def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): raise ValueError("Unable to parse varion_code: " "{code}".format(code=varion_code)) - startingMonth = _month_to_int[startingMonth_code] + startingMonth = liboffsets._month_to_int[startingMonth_code] weekday = _weekday_to_int[weekday_code] return {"weekday": weekday,