Skip to content

Commit f745e52

Browse files
jbrockmendeljreback
authored andcommitted
Implement business_start/end cases for shift_months (#18489)
1 parent 49ddcd5 commit f745e52

File tree

5 files changed

+126
-36
lines changed

5 files changed

+126
-36
lines changed

asv_bench/benchmarks/offset.py

+19
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,25 @@ def time_apply_series(self, param):
3838
self.ser + self.offset
3939

4040

41+
class OnOffset(object):
42+
goal_time = 0.2
43+
44+
params = [pd.offsets.QuarterBegin(), pd.offsets.QuarterEnd(),
45+
pd.offsets.BQuarterBegin(), pd.offsets.BQuarterEnd()]
46+
param_names = ['offset']
47+
48+
def setup(self, offset):
49+
self.offset = offset
50+
self.dates = [datetime(2016, m, d)
51+
for m in [10, 11, 12]
52+
for d in [1, 2, 3, 28, 29, 30, 31]
53+
if not (m == 11 and d == 31)]
54+
55+
def time_on_offset(self, offset):
56+
for date in self.dates:
57+
self.offset.onOffset(date)
58+
59+
4160
class DatetimeIndexArithmetic(object):
4261
goal_time = 0.2
4362

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ Performance Improvements
116116
- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`)
117117
- Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`)
118118
- Improved performance of ``IntervalIndex.symmetric_difference()`` (:issue:`18475`)
119+
- Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`)
119120

120121
.. _whatsnew_0220.docs:
121122

pandas/_libs/tslibs/offsets.pyx

+51-1
Original file line numberDiff line numberDiff line change
@@ -554,8 +554,58 @@ def shift_months(int64_t[:] dtindex, int months, object day=None):
554554

555555
dts.day = get_days_in_month(dts.year, dts.month)
556556
out[i] = dtstruct_to_dt64(&dts)
557+
558+
elif day == 'business_start':
559+
for i in range(count):
560+
if dtindex[i] == NPY_NAT:
561+
out[i] = NPY_NAT
562+
continue
563+
564+
dt64_to_dtstruct(dtindex[i], &dts)
565+
months_to_roll = months
566+
wkday, days_in_month = monthrange(dts.year, dts.month)
567+
compare_day = get_firstbday(wkday, days_in_month)
568+
569+
if months_to_roll > 0 and dts.day < compare_day:
570+
months_to_roll -= 1
571+
elif months_to_roll <= 0 and dts.day > compare_day:
572+
# as if rolled forward already
573+
months_to_roll += 1
574+
575+
dts.year = year_add_months(dts, months_to_roll)
576+
dts.month = month_add_months(dts, months_to_roll)
577+
578+
wkday, days_in_month = monthrange(dts.year, dts.month)
579+
dts.day = get_firstbday(wkday, days_in_month)
580+
out[i] = dtstruct_to_dt64(&dts)
581+
582+
elif day == 'business_end':
583+
for i in range(count):
584+
if dtindex[i] == NPY_NAT:
585+
out[i] = NPY_NAT
586+
continue
587+
588+
dt64_to_dtstruct(dtindex[i], &dts)
589+
months_to_roll = months
590+
wkday, days_in_month = monthrange(dts.year, dts.month)
591+
compare_day = get_lastbday(wkday, days_in_month)
592+
593+
if months_to_roll > 0 and dts.day < compare_day:
594+
months_to_roll -= 1
595+
elif months_to_roll <= 0 and dts.day > compare_day:
596+
# as if rolled forward already
597+
months_to_roll += 1
598+
599+
dts.year = year_add_months(dts, months_to_roll)
600+
dts.month = month_add_months(dts, months_to_roll)
601+
602+
wkday, days_in_month = monthrange(dts.year, dts.month)
603+
dts.day = get_lastbday(wkday, days_in_month)
604+
out[i] = dtstruct_to_dt64(&dts)
605+
557606
else:
558-
raise ValueError("day must be None, 'start' or 'end'")
607+
raise ValueError("day must be None, 'start', 'end', "
608+
"'business_start', or 'business_end'")
559609

560610
return np.asarray(out)
561611

pandas/tests/tseries/offsets/test_yqm_offsets.py

+30
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import pytest
88

9+
import pandas as pd
910
from pandas import Timestamp
1011
from pandas import compat
1112

@@ -32,6 +33,35 @@ def test_quarterly_dont_normalize():
3233
assert (result.time() == date.time())
3334

3435

36+
@pytest.mark.parametrize('offset', [MonthBegin(), MonthEnd(),
37+
BMonthBegin(), BMonthEnd()])
38+
def test_apply_index(offset):
39+
rng = pd.date_range(start='1/1/2000', periods=100000, freq='T')
40+
ser = pd.Series(rng)
41+
42+
res = rng + offset
43+
res_v2 = offset.apply_index(rng)
44+
assert (res == res_v2).all()
45+
assert res[0] == rng[0] + offset
46+
assert res[-1] == rng[-1] + offset
47+
res2 = ser + offset
48+
# apply_index is only for indexes, not series, so no res2_v2
49+
assert res2.iloc[0] == ser.iloc[0] + offset
50+
assert res2.iloc[-1] == ser.iloc[-1] + offset
51+
52+
53+
@pytest.mark.parametrize('offset', [QuarterBegin(), QuarterEnd(),
54+
BQuarterBegin(), BQuarterEnd()])
55+
def test_on_offset(offset):
56+
dates = [datetime(2016, m, d)
57+
for m in [10, 11, 12]
58+
for d in [1, 2, 3, 28, 29, 30, 31] if not (m == 11 and d == 31)]
59+
for date in dates:
60+
res = offset.onOffset(date)
61+
slow_version = date == (date + offset) - offset
62+
assert res == slow_version
63+
64+
3565
# --------------------------------------------------------------------
3666
# Months
3767

pandas/tseries/offsets.py

+25-35
Original file line numberDiff line numberDiff line change
@@ -929,8 +929,9 @@ def name(self):
929929
if self.isAnchored:
930930
return self.rule_code
931931
else:
932+
month = liboffsets._int_to_month[self.n]
932933
return "{code}-{month}".format(code=self.rule_code,
933-
month=_int_to_month[self.n])
934+
month=month)
934935

935936
def onOffset(self, dt):
936937
if self.normalize and not _is_normalized(dt):
@@ -950,28 +951,23 @@ def apply(self, other):
950951

951952
return shift_month(other, n, self._day_opt)
952953

954+
@apply_index_wraps
955+
def apply_index(self, i):
956+
shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt)
957+
return i._shallow_copy(shifted)
958+
953959

954960
class MonthEnd(MonthOffset):
955961
"""DateOffset of one month end"""
956962
_prefix = 'M'
957963
_day_opt = 'end'
958964

959-
@apply_index_wraps
960-
def apply_index(self, i):
961-
shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt)
962-
return i._shallow_copy(shifted)
963-
964965

965966
class MonthBegin(MonthOffset):
966967
"""DateOffset of one month at beginning"""
967968
_prefix = 'MS'
968969
_day_opt = 'start'
969970

970-
@apply_index_wraps
971-
def apply_index(self, i):
972-
shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt)
973-
return i._shallow_copy(shifted)
974-
975971

976972
class BusinessMonthEnd(MonthOffset):
977973
"""DateOffset increments between business EOM dates"""
@@ -1008,6 +1004,7 @@ class CustomBusinessMonthEnd(BusinessMixin, MonthOffset):
10081004
_prefix = 'CBM'
10091005

10101006
onOffset = DateOffset.onOffset # override MonthOffset method
1007+
apply_index = DateOffset.apply_index # override MonthOffset method
10111008

10121009
def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
10131010
holidays=None, calendar=None, offset=timedelta(0)):
@@ -1083,6 +1080,7 @@ class CustomBusinessMonthBegin(BusinessMixin, MonthOffset):
10831080
_prefix = 'CBMS'
10841081

10851082
onOffset = DateOffset.onOffset # override MonthOffset method
1083+
apply_index = DateOffset.apply_index # override MonthOffset method
10861084

10871085
def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
10881086
holidays=None, calendar=None, offset=timedelta(0)):
@@ -1603,15 +1601,15 @@ def isAnchored(self):
16031601
def _from_name(cls, suffix=None):
16041602
kwargs = {}
16051603
if suffix:
1606-
kwargs['startingMonth'] = _month_to_int[suffix]
1604+
kwargs['startingMonth'] = liboffsets._month_to_int[suffix]
16071605
else:
16081606
if cls._from_name_startingMonth is not None:
16091607
kwargs['startingMonth'] = cls._from_name_startingMonth
16101608
return cls(**kwargs)
16111609

16121610
@property
16131611
def rule_code(self):
1614-
month = _int_to_month[self.startingMonth]
1612+
month = liboffsets._int_to_month[self.startingMonth]
16151613
return '{prefix}-{month}'.format(prefix=self._prefix, month=month)
16161614

16171615
@apply_wraps
@@ -1631,6 +1629,12 @@ def apply(self, other):
16311629

16321630
return shift_month(other, 3 * n - months_since, self._day_opt)
16331631

1632+
def onOffset(self, dt):
1633+
if self.normalize and not _is_normalized(dt):
1634+
return False
1635+
modMonth = (dt.month - self.startingMonth) % 3
1636+
return modMonth == 0 and dt.day == self._get_offset_day(dt)
1637+
16341638

16351639
class BQuarterEnd(QuarterOffset):
16361640
"""DateOffset increments between business Quarter dates
@@ -1644,16 +1648,6 @@ class BQuarterEnd(QuarterOffset):
16441648
_prefix = 'BQ'
16451649
_day_opt = 'business_end'
16461650

1647-
def onOffset(self, dt):
1648-
if self.normalize and not _is_normalized(dt):
1649-
return False
1650-
modMonth = (dt.month - self.startingMonth) % 3
1651-
return modMonth == 0 and dt.day == self._get_offset_day(dt)
1652-
1653-
1654-
_int_to_month = tslib._MONTH_ALIASES
1655-
_month_to_int = {v: k for k, v in _int_to_month.items()}
1656-
16571651

16581652
# TODO: This is basically the same as BQuarterEnd
16591653
class BQuarterBegin(QuarterOffset):
@@ -1680,12 +1674,6 @@ class QuarterEnd(EndMixin, QuarterOffset):
16801674
def apply_index(self, i):
16811675
return self._end_apply_index(i, self.freqstr)
16821676

1683-
def onOffset(self, dt):
1684-
if self.normalize and not _is_normalized(dt):
1685-
return False
1686-
modMonth = (dt.month - self.startingMonth) % 3
1687-
return modMonth == 0 and dt.day == self._get_offset_day(dt)
1688-
16891677

16901678
class QuarterBegin(BeginMixin, QuarterOffset):
16911679
_outputName = 'QuarterBegin'
@@ -1697,7 +1685,8 @@ class QuarterBegin(BeginMixin, QuarterOffset):
16971685
@apply_index_wraps
16981686
def apply_index(self, i):
16991687
freq_month = 12 if self.startingMonth == 1 else self.startingMonth - 1
1700-
freqstr = 'Q-{month}'.format(month=_int_to_month[freq_month])
1688+
month = liboffsets._int_to_month[freq_month]
1689+
freqstr = 'Q-{month}'.format(month=month)
17011690
return self._beg_apply_index(i, freqstr)
17021691

17031692

@@ -1738,12 +1727,12 @@ def __init__(self, n=1, normalize=False, month=None):
17381727
def _from_name(cls, suffix=None):
17391728
kwargs = {}
17401729
if suffix:
1741-
kwargs['month'] = _month_to_int[suffix]
1730+
kwargs['month'] = liboffsets._month_to_int[suffix]
17421731
return cls(**kwargs)
17431732

17441733
@property
17451734
def rule_code(self):
1746-
month = _int_to_month[self.month]
1735+
month = liboffsets._int_to_month[self.month]
17471736
return '{prefix}-{month}'.format(prefix=self._prefix, month=month)
17481737

17491738

@@ -1784,7 +1773,8 @@ class YearBegin(BeginMixin, YearOffset):
17841773
@apply_index_wraps
17851774
def apply_index(self, i):
17861775
freq_month = 12 if self.month == 1 else self.month - 1
1787-
freqstr = 'A-{month}'.format(month=_int_to_month[freq_month])
1776+
month = liboffsets._int_to_month[freq_month]
1777+
freqstr = 'A-{month}'.format(month=month)
17881778
return self._beg_apply_index(i, freqstr)
17891779

17901780

@@ -1969,7 +1959,7 @@ def _get_suffix_prefix(self):
19691959

19701960
def get_rule_code_suffix(self):
19711961
prefix = self._get_suffix_prefix()
1972-
month = _int_to_month[self.startingMonth]
1962+
month = liboffsets._int_to_month[self.startingMonth]
19731963
weekday = _int_to_weekday[self.weekday]
19741964
return '{prefix}-{month}-{weekday}'.format(prefix=prefix, month=month,
19751965
weekday=weekday)
@@ -1984,7 +1974,7 @@ def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code):
19841974
raise ValueError("Unable to parse varion_code: "
19851975
"{code}".format(code=varion_code))
19861976

1987-
startingMonth = _month_to_int[startingMonth_code]
1977+
startingMonth = liboffsets._month_to_int[startingMonth_code]
19881978
weekday = _weekday_to_int[weekday_code]
19891979

19901980
return {"weekday": weekday,

0 commit comments

Comments
 (0)