Skip to content

Commit f21ed4d

Browse files
jbrockmendelNo-Stream
authored andcommitted
Implement scalar shift_month mirroring tslib.shift_months (pandas-dev#18218)
1 parent d86552c commit f21ed4d

File tree

3 files changed

+89
-33
lines changed

3 files changed

+89
-33
lines changed

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ Performance Improvements
7171

7272
- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`)
7373
- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`)
74+
- :class`DateOffset` arithmetic performance is improved (:issue:`18218`)
7475
-
7576

7677
.. _whatsnew_0220.docs:

pandas/_libs/tslibs/offsets.pyx

+56-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
cimport cython
55

66
import time
7-
from cpython.datetime cimport timedelta, time as dt_time
7+
from cpython.datetime cimport datetime, timedelta, time as dt_time
88

99
from dateutil.relativedelta import relativedelta
1010

@@ -13,9 +13,9 @@ cimport numpy as np
1313
np.import_array()
1414

1515

16-
from util cimport is_string_object
16+
from util cimport is_string_object, is_integer_object
1717

18-
from pandas._libs.tslib import pydt_to_i8
18+
from pandas._libs.tslib import pydt_to_i8, monthrange
1919

2020
from frequencies cimport get_freq_code
2121
from conversion cimport tz_convert_single
@@ -375,3 +375,56 @@ class BaseOffset(_BaseOffset):
375375
# i.e. isinstance(other, (ABCDatetimeIndex, ABCSeries))
376376
return other - self
377377
return -self + other
378+
379+
380+
# ----------------------------------------------------------------------
381+
# RelativeDelta Arithmetic
382+
383+
384+
cpdef datetime shift_month(datetime stamp, int months, object day_opt=None):
385+
"""
386+
Given a datetime (or Timestamp) `stamp`, an integer `months` and an
387+
option `day_opt`, return a new datetimelike that many months later,
388+
with day determined by `day_opt` using relativedelta semantics.
389+
390+
Scalar analogue of tslib.shift_months
391+
392+
Parameters
393+
----------
394+
stamp : datetime or Timestamp
395+
months : int
396+
day_opt : None, 'start', 'end', or an integer
397+
None: returned datetimelike has the same day as the input, or the
398+
last day of the month if the new month is too short
399+
'start': returned datetimelike has day=1
400+
'end': returned datetimelike has day on the last day of the month
401+
int: returned datetimelike has day equal to day_opt
402+
403+
Returns
404+
-------
405+
shifted : datetime or Timestamp (same as input `stamp`)
406+
"""
407+
cdef:
408+
int year, month, day
409+
int dim, dy
410+
411+
dy = (stamp.month + months) // 12
412+
month = (stamp.month + months) % 12
413+
414+
if month == 0:
415+
month = 12
416+
dy -= 1
417+
year = stamp.year + dy
418+
419+
dim = monthrange(year, month)[1]
420+
if day_opt is None:
421+
day = min(stamp.day, dim)
422+
elif day_opt == 'start':
423+
day = 1
424+
elif day_opt == 'end':
425+
day = dim
426+
elif is_integer_object(day_opt):
427+
day = min(day_opt, dim)
428+
else:
429+
raise ValueError(day_opt)
430+
return stamp.replace(year=year, month=month, day=day)

pandas/tseries/offsets.py

+32-30
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
_int_to_weekday, _weekday_to_int,
2323
_determine_offset,
2424
apply_index_wraps,
25+
shift_month,
2526
BeginMixin, EndMixin,
2627
BaseOffset)
2728

@@ -252,6 +253,8 @@ def apply_index(self, i):
252253
"applied vectorized".format(kwd=kwd))
253254

254255
def isAnchored(self):
256+
# TODO: Does this make sense for the general case? It would help
257+
# if there were a canonical docstring for what isAnchored means.
255258
return (self.n == 1)
256259

257260
def _params(self):
@@ -721,6 +724,7 @@ def apply(self, other):
721724

722725
return result
723726
else:
727+
# TODO: Figure out the end of this sente
724728
raise ApplyTypeError(
725729
'Only know how to combine business hour with ')
726730

@@ -927,10 +931,10 @@ def apply(self, other):
927931
n = self.n
928932
_, days_in_month = tslib.monthrange(other.year, other.month)
929933
if other.day != days_in_month:
930-
other = other + relativedelta(months=-1, day=31)
934+
other = shift_month(other, -1, 'end')
931935
if n <= 0:
932936
n = n + 1
933-
other = other + relativedelta(months=n, day=31)
937+
other = shift_month(other, n, 'end')
934938
return other
935939

936940
@apply_index_wraps
@@ -956,7 +960,7 @@ def apply(self, other):
956960
if other.day > 1 and n <= 0: # then roll forward if n<=0
957961
n += 1
958962

959-
return other + relativedelta(months=n, day=1)
963+
return shift_month(other, n, 'start')
960964

961965
@apply_index_wraps
962966
def apply_index(self, i):
@@ -1002,12 +1006,12 @@ def apply(self, other):
10021006
if not self.onOffset(other):
10031007
_, days_in_month = tslib.monthrange(other.year, other.month)
10041008
if 1 < other.day < self.day_of_month:
1005-
other += relativedelta(day=self.day_of_month)
1009+
other = other.replace(day=self.day_of_month)
10061010
if n > 0:
10071011
# rollforward so subtract 1
10081012
n -= 1
10091013
elif self.day_of_month < other.day < days_in_month:
1010-
other += relativedelta(day=self.day_of_month)
1014+
other = other.replace(day=self.day_of_month)
10111015
if n < 0:
10121016
# rollforward in the negative direction so add 1
10131017
n += 1
@@ -1084,19 +1088,19 @@ def onOffset(self, dt):
10841088
def _apply(self, n, other):
10851089
# if other.day is not day_of_month move to day_of_month and update n
10861090
if other.day < self.day_of_month:
1087-
other += relativedelta(day=self.day_of_month)
1091+
other = other.replace(day=self.day_of_month)
10881092
if n > 0:
10891093
n -= 1
10901094
elif other.day > self.day_of_month:
1091-
other += relativedelta(day=self.day_of_month)
1095+
other = other.replace(day=self.day_of_month)
10921096
if n == 0:
10931097
n = 1
10941098
else:
10951099
n += 1
10961100

10971101
months = n // 2
10981102
day = 31 if n % 2 else self.day_of_month
1099-
return other + relativedelta(months=months, day=day)
1103+
return shift_month(other, months, day)
11001104

11011105
def _get_roll(self, i, before_day_of_month, after_day_of_month):
11021106
n = self.n
@@ -1141,21 +1145,21 @@ def onOffset(self, dt):
11411145
def _apply(self, n, other):
11421146
# if other.day is not day_of_month move to day_of_month and update n
11431147
if other.day < self.day_of_month:
1144-
other += relativedelta(day=self.day_of_month)
1148+
other = other.replace(day=self.day_of_month)
11451149
if n == 0:
11461150
n = -1
11471151
else:
11481152
n -= 1
11491153
elif other.day > self.day_of_month:
1150-
other += relativedelta(day=self.day_of_month)
1154+
other = other.replace(day=self.day_of_month)
11511155
if n == 0:
11521156
n = 1
11531157
elif n < 0:
11541158
n += 1
11551159

11561160
months = n // 2 + n % 2
11571161
day = 1 if n % 2 else self.day_of_month
1158-
return other + relativedelta(months=months, day=day)
1162+
return shift_month(other, months, day)
11591163

11601164
def _get_roll(self, i, before_day_of_month, after_day_of_month):
11611165
n = self.n
@@ -1191,7 +1195,7 @@ def apply(self, other):
11911195
n = n - 1
11921196
elif n <= 0 and other.day > lastBDay:
11931197
n = n + 1
1194-
other = other + relativedelta(months=n, day=31)
1198+
other = shift_month(other, n, 'end')
11951199

11961200
if other.weekday() > 4:
11971201
other = other - BDay()
@@ -1215,7 +1219,7 @@ def apply(self, other):
12151219
other = other + timedelta(days=first - other.day)
12161220
n -= 1
12171221

1218-
other = other + relativedelta(months=n)
1222+
other = shift_month(other, n, None)
12191223
wkday, _ = tslib.monthrange(other.year, other.month)
12201224
first = _get_firstbday(wkday)
12211225
result = datetime(other.year, other.month, first,
@@ -1520,8 +1524,7 @@ def apply(self, other):
15201524
else:
15211525
months = self.n + 1
15221526

1523-
other = self.getOffsetOfMonth(
1524-
other + relativedelta(months=months, day=1))
1527+
other = self.getOffsetOfMonth(shift_month(other, months, 'start'))
15251528
other = datetime(other.year, other.month, other.day, base.hour,
15261529
base.minute, base.second, base.microsecond)
15271530
return other
@@ -1612,8 +1615,7 @@ def apply(self, other):
16121615
else:
16131616
months = self.n + 1
16141617

1615-
return self.getOffsetOfMonth(
1616-
other + relativedelta(months=months, day=1))
1618+
return self.getOffsetOfMonth(shift_month(other, months, 'start'))
16171619

16181620
def getOffsetOfMonth(self, dt):
16191621
m = MonthEnd()
@@ -1716,7 +1718,7 @@ def apply(self, other):
17161718
elif n <= 0 and other.day > lastBDay and monthsToGo == 0:
17171719
n = n + 1
17181720

1719-
other = other + relativedelta(months=monthsToGo + 3 * n, day=31)
1721+
other = shift_month(other, monthsToGo + 3 * n, 'end')
17201722
other = tslib._localize_pydatetime(other, base.tzinfo)
17211723
if other.weekday() > 4:
17221724
other = other - BDay()
@@ -1761,7 +1763,7 @@ def apply(self, other):
17611763
n = n - 1
17621764

17631765
# get the first bday for result
1764-
other = other + relativedelta(months=3 * n - monthsSince)
1766+
other = shift_month(other, 3 * n - monthsSince, None)
17651767
wkday, _ = tslib.monthrange(other.year, other.month)
17661768
first = _get_firstbday(wkday)
17671769
result = datetime(other.year, other.month, first,
@@ -1795,7 +1797,7 @@ def apply(self, other):
17951797
if n > 0 and not (other.day >= days_in_month and monthsToGo == 0):
17961798
n = n - 1
17971799

1798-
other = other + relativedelta(months=monthsToGo + 3 * n, day=31)
1800+
other = shift_month(other, monthsToGo + 3 * n, 'end')
17991801
return other
18001802

18011803
@apply_index_wraps
@@ -1830,7 +1832,7 @@ def apply(self, other):
18301832
# after start, so come back an extra period as if rolled forward
18311833
n = n + 1
18321834

1833-
other = other + relativedelta(months=3 * n - monthsSince, day=1)
1835+
other = shift_month(other, 3 * n - monthsSince, 'start')
18341836
return other
18351837

18361838
@apply_index_wraps
@@ -1889,7 +1891,7 @@ def apply(self, other):
18891891
(other.month == self.month and other.day > lastBDay)):
18901892
years += 1
18911893

1892-
other = other + relativedelta(years=years)
1894+
other = shift_month(other, 12 * years, None)
18931895

18941896
_, days_in_month = tslib.monthrange(other.year, self.month)
18951897
result = datetime(other.year, self.month, days_in_month,
@@ -1927,7 +1929,7 @@ def apply(self, other):
19271929
years += 1
19281930

19291931
# set first bday for result
1930-
other = other + relativedelta(years=years)
1932+
other = shift_month(other, years * 12, None)
19311933
wkday, days_in_month = tslib.monthrange(other.year, self.month)
19321934
first = _get_firstbday(wkday)
19331935
return datetime(other.year, self.month, first, other.hour,
@@ -2145,8 +2147,8 @@ def onOffset(self, dt):
21452147

21462148
if self.variation == "nearest":
21472149
# We have to check the year end of "this" cal year AND the previous
2148-
return year_end == dt or \
2149-
self.get_year_end(dt - relativedelta(months=1)) == dt
2150+
return (year_end == dt or
2151+
self.get_year_end(shift_month(dt, -1, None)) == dt)
21502152
else:
21512153
return year_end == dt
21522154

@@ -2226,8 +2228,8 @@ def get_year_end(self, dt):
22262228
def get_target_month_end(self, dt):
22272229
target_month = datetime(
22282230
dt.year, self.startingMonth, 1, tzinfo=dt.tzinfo)
2229-
next_month_first_of = target_month + relativedelta(months=+1)
2230-
return next_month_first_of + relativedelta(days=-1)
2231+
next_month_first_of = shift_month(target_month, 1, None)
2232+
return next_month_first_of + timedelta(days=-1)
22312233

22322234
def _get_year_end_nearest(self, dt):
22332235
target_date = self.get_target_month_end(dt)
@@ -2382,7 +2384,7 @@ def apply(self, other):
23822384
qtr_lens = self.get_weeks(other + self._offset)
23832385

23842386
for weeks in qtr_lens:
2385-
start += relativedelta(weeks=weeks)
2387+
start += timedelta(weeks=weeks)
23862388
if start > other:
23872389
other = start
23882390
n -= 1
@@ -2399,7 +2401,7 @@ def apply(self, other):
23992401
qtr_lens = self.get_weeks(other)
24002402

24012403
for weeks in reversed(qtr_lens):
2402-
end -= relativedelta(weeks=weeks)
2404+
end -= timedelta(weeks=weeks)
24032405
if end < other:
24042406
other = end
24052407
n -= 1
@@ -2442,7 +2444,7 @@ def onOffset(self, dt):
24422444

24432445
current = next_year_end
24442446
for qtr_len in qtr_lens[0:4]:
2445-
current += relativedelta(weeks=qtr_len)
2447+
current += timedelta(weeks=qtr_len)
24462448
if dt == current:
24472449
return True
24482450
return False

0 commit comments

Comments
 (0)