Skip to content

Commit 9fc9201

Browse files
committed
Merge pull request #11205 from chris-b1/faster-offsets
PERF: vectorized DateOffset with months
2 parents 6ab626f + e58f18c commit 9fc9201

File tree

5 files changed

+98
-16
lines changed

5 files changed

+98
-16
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1059,6 +1059,7 @@ Performance Improvements
10591059
- 2x improvement of ``Series.value_counts`` for float dtype (:issue:`10821`)
10601060
- Enable ``infer_datetime_format`` in ``to_datetime`` when date components do not have 0 padding (:issue:`11142`)
10611061
- Regression from 0.16.1 in constructing ``DataFrame`` from nested dictionary (:issue:`11084`)
1062+
- Performance improvements in addition/subtraction operations for ``DateOffset`` with ``Series`` or ``DatetimeIndex`` (issue:`10744`, :issue:`11205`)
10621063

10631064
.. _whatsnew_0170.bug_fixes:
10641065

pandas/tseries/offsets.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -261,16 +261,12 @@ def apply_index(self, i):
261261
# relativedelta/_offset path only valid for base DateOffset
262262
if (self._use_relativedelta and
263263
set(self.kwds).issubset(relativedelta_fast)):
264+
264265
months = ((self.kwds.get('years', 0) * 12
265266
+ self.kwds.get('months', 0)) * self.n)
266267
if months:
267-
base = (i.to_period('M') + months).to_timestamp()
268-
time = i.to_perioddelta('D')
269-
days = i.to_perioddelta('M') - time
270-
# minimum prevents month-end from wrapping
271-
day_offset = np.minimum(days,
272-
to_timedelta(base.days_in_month - 1, unit='D'))
273-
i = base + day_offset + time
268+
shifted = tslib.shift_months(i.asi8, months)
269+
i = i._shallow_copy(shifted)
274270

275271
weeks = (self.kwds.get('weeks', 0)) * self.n
276272
if weeks:
@@ -1081,7 +1077,9 @@ def apply(self, other):
10811077

10821078
@apply_index_wraps
10831079
def apply_index(self, i):
1084-
return self._end_apply_index(i, 'M')
1080+
months = self.n - 1 if self.n >= 0 else self.n
1081+
shifted = tslib.shift_months(i.asi8, months, 'end')
1082+
return i._shallow_copy(shifted)
10851083

10861084
def onOffset(self, dt):
10871085
if self.normalize and not _is_normalized(dt):
@@ -1106,7 +1104,9 @@ def apply(self, other):
11061104

11071105
@apply_index_wraps
11081106
def apply_index(self, i):
1109-
return self._beg_apply_index(i, 'M')
1107+
months = self.n + 1 if self.n < 0 else self.n
1108+
shifted = tslib.shift_months(i.asi8, months, 'start')
1109+
return i._shallow_copy(shifted)
11101110

11111111
def onOffset(self, dt):
11121112
if self.normalize and not _is_normalized(dt):

pandas/tseries/tests/test_timeseries.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -2565,32 +2565,32 @@ def test_datetime64_with_DateOffset(self):
25652565
for klass, assert_func in zip([Series, DatetimeIndex],
25662566
[self.assert_series_equal,
25672567
tm.assert_index_equal]):
2568-
s = klass(date_range('2000-01-01', '2000-01-31'))
2568+
s = klass(date_range('2000-01-01', '2000-01-31'), name='a')
25692569
result = s + pd.DateOffset(years=1)
25702570
result2 = pd.DateOffset(years=1) + s
2571-
exp = klass(date_range('2001-01-01', '2001-01-31'))
2571+
exp = klass(date_range('2001-01-01', '2001-01-31'), name='a')
25722572
assert_func(result, exp)
25732573
assert_func(result2, exp)
25742574

25752575
result = s - pd.DateOffset(years=1)
2576-
exp = klass(date_range('1999-01-01', '1999-01-31'))
2576+
exp = klass(date_range('1999-01-01', '1999-01-31'), name='a')
25772577
assert_func(result, exp)
25782578

25792579
s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
2580-
pd.Timestamp('2000-02-15', tz='US/Central')])
2580+
pd.Timestamp('2000-02-15', tz='US/Central')], name='a')
25812581
result = s + pd.offsets.Day()
25822582
result2 = pd.offsets.Day() + s
25832583
exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'),
2584-
Timestamp('2000-02-16', tz='US/Central')])
2584+
Timestamp('2000-02-16', tz='US/Central')], name='a')
25852585
assert_func(result, exp)
25862586
assert_func(result2, exp)
25872587

25882588
s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
2589-
pd.Timestamp('2000-02-15', tz='US/Central')])
2589+
pd.Timestamp('2000-02-15', tz='US/Central')], name='a')
25902590
result = s + pd.offsets.MonthEnd()
25912591
result2 = pd.offsets.MonthEnd() + s
25922592
exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'),
2593-
Timestamp('2000-02-29', tz='US/Central')])
2593+
Timestamp('2000-02-29', tz='US/Central')], name='a')
25942594
assert_func(result, exp)
25952595
assert_func(result2, exp)
25962596

pandas/tseries/tests/test_tslib.py

+11
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,17 @@ def compare_local_to_utc(tz_didx, utc_didx):
949949
tslib.maybe_get_tz('Asia/Tokyo'))
950950
self.assert_numpy_array_equal(result, np.array([tslib.iNaT], dtype=np.int64))
951951

952+
def test_shift_months(self):
953+
s = DatetimeIndex([Timestamp('2000-01-05 00:15:00'), Timestamp('2000-01-31 00:23:00'),
954+
Timestamp('2000-01-01'), Timestamp('2000-02-29'), Timestamp('2000-12-31')])
955+
for years in [-1, 0, 1]:
956+
for months in [-2, 0, 2]:
957+
actual = DatetimeIndex(tslib.shift_months(s.asi8, years * 12 + months))
958+
expected = DatetimeIndex([x + offsets.DateOffset(years=years, months=months) for x in s])
959+
tm.assert_index_equal(actual, expected)
960+
961+
962+
952963
class TestTimestampOps(tm.TestCase):
953964
def test_timestamp_and_datetime(self):
954965
self.assertEqual((Timestamp(datetime.datetime(2013, 10, 13)) - datetime.datetime(2013, 10, 12)).days, 1)

pandas/tslib.pyx

+70
Original file line numberDiff line numberDiff line change
@@ -3847,6 +3847,7 @@ def get_time_micros(ndarray[int64_t] dtindex):
38473847

38483848
return micros
38493849

3850+
38503851
@cython.wraparound(False)
38513852
def get_date_field(ndarray[int64_t] dtindex, object field):
38523853
'''
@@ -4386,6 +4387,75 @@ cpdef normalize_date(object dt):
43864387
raise TypeError('Unrecognized type: %s' % type(dt))
43874388

43884389

4390+
cdef inline int _year_add_months(pandas_datetimestruct dts,
4391+
int months):
4392+
'''new year number after shifting pandas_datetimestruct number of months'''
4393+
return dts.year + (dts.month + months - 1) / 12
4394+
4395+
cdef inline int _month_add_months(pandas_datetimestruct dts,
4396+
int months):
4397+
'''new month number after shifting pandas_datetimestruct number of months'''
4398+
cdef int new_month = (dts.month + months) % 12
4399+
return 12 if new_month == 0 else new_month
4400+
4401+
@cython.wraparound(False)
4402+
def shift_months(int64_t[:] dtindex, int months, object day=None):
4403+
'''
4404+
Given an int64-based datetime index, shift all elements
4405+
specified number of months using DateOffset semantics
4406+
4407+
day: {None, 'start', 'end'}
4408+
* None: day of month
4409+
* 'start' 1st day of month
4410+
* 'end' last day of month
4411+
'''
4412+
cdef:
4413+
Py_ssize_t i
4414+
int days_in_month
4415+
pandas_datetimestruct dts
4416+
int count = len(dtindex)
4417+
int64_t[:] out = np.empty(count, dtype='int64')
4418+
4419+
for i in range(count):
4420+
if dtindex[i] == NPY_NAT:
4421+
out[i] = NPY_NAT
4422+
else:
4423+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
4424+
4425+
if day is None:
4426+
dts.year = _year_add_months(dts, months)
4427+
dts.month = _month_add_months(dts, months)
4428+
#prevent day from wrapping around month end
4429+
days_in_month = days_per_month_table[is_leapyear(dts.year)][dts.month-1]
4430+
dts.day = min(dts.day, days_in_month)
4431+
elif day == 'start':
4432+
dts.year = _year_add_months(dts, months)
4433+
dts.month = _month_add_months(dts, months)
4434+
4435+
# offset semantics - when subtracting if at the start anchor
4436+
# point, shift back by one more month
4437+
if months <= 0 and dts.day == 1:
4438+
dts.year = _year_add_months(dts, -1)
4439+
dts.month = _month_add_months(dts, -1)
4440+
else:
4441+
dts.day = 1
4442+
elif day == 'end':
4443+
days_in_month = days_per_month_table[is_leapyear(dts.year)][dts.month-1]
4444+
dts.year = _year_add_months(dts, months)
4445+
dts.month = _month_add_months(dts, months)
4446+
4447+
# similar semantics - when adding shift forward by one
4448+
# month if already at an end of month
4449+
if months >= 0 and dts.day == days_in_month:
4450+
dts.year = _year_add_months(dts, 1)
4451+
dts.month = _month_add_months(dts, 1)
4452+
4453+
days_in_month = days_per_month_table[is_leapyear(dts.year)][dts.month-1]
4454+
dts.day = days_in_month
4455+
4456+
out[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
4457+
return np.asarray(out)
4458+
43894459
#----------------------------------------------------------------------
43904460
# Don't even ask
43914461

0 commit comments

Comments
 (0)