Skip to content

BUG: vectorized DateOffset match non-vectorized #11427

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 13, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions doc/source/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,52 @@ These can be used as arguments to ``date_range``, ``bdate_range``, constructors
for ``DatetimeIndex``, as well as various other timeseries-related functions
in pandas.

Anchored Offset Semantics
~~~~~~~~~~~~~~~~~~~~~~~~~

For those offsets that are anchored to the start or end of specific
frequency (``MonthEnd``, ``MonthBegin``, ``WeekEnd``, etc) the following
rules apply to rolling forward and backwards.

When ``n`` is not 0, if the given date is not on an anchor point, it snapped to the next(previous)
anchor point, and moved ``|n|-1`` additional steps forwards or backwards.

.. ipython:: python

pd.Timestamp('2014-01-02') + MonthBegin(n=1)
pd.Timestamp('2014-01-02') + MonthEnd(n=1)

pd.Timestamp('2014-01-02') - MonthBegin(n=1)
pd.Timestamp('2014-01-02') - MonthEnd(n=1)

pd.Timestamp('2014-01-02') + MonthBegin(n=4)
pd.Timestamp('2014-01-02') - MonthBegin(n=4)

If the given date *is* on an anchor point, it is moved ``|n|`` points forwards
or backwards.

.. ipython:: python

pd.Timestamp('2014-01-01') + MonthBegin(n=1)
pd.Timestamp('2014-01-31') + MonthEnd(n=1)

pd.Timestamp('2014-01-01') - MonthBegin(n=1)
pd.Timestamp('2014-01-31') - MonthEnd(n=1)

pd.Timestamp('2014-01-01') + MonthBegin(n=4)
pd.Timestamp('2014-01-31') - MonthBegin(n=4)

For the case when ``n=0``, the date is not moved if on an anchor point, otherwise
it is rolled forward to the next anchor point.

.. ipython:: python

pd.Timestamp('2014-01-02') + MonthBegin(n=0)
pd.Timestamp('2014-01-02') + MonthEnd(n=0)

pd.Timestamp('2014-01-01') + MonthBegin(n=0)
pd.Timestamp('2014-01-31') + MonthEnd(n=0)

.. _timeseries.legacyaliases:

Legacy Aliases
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ Bug Fixes




- Bug in vectorized ``DateOffset`` when ``n`` parameter is ``0`` (:issue:`11370`)



Expand Down
11 changes: 5 additions & 6 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def _beg_apply_index(self, i, freq):
from pandas.tseries.frequencies import get_freq_code
base, mult = get_freq_code(freq)
base_period = i.to_period(base)
if self.n < 0:
if self.n <= 0:
# when subtracting, dates on start roll to prior
roll = np.where(base_period.to_timestamp() == i - off,
self.n, self.n + 1)
Expand All @@ -464,7 +464,7 @@ def _end_apply_index(self, i, freq):
base, mult = get_freq_code(freq)
base_period = i.to_period(base)
if self.n > 0:
# when adding, dtates on end roll to next
# when adding, dates on end roll to next
roll = np.where(base_period.to_timestamp(how='end') == i - off,
self.n, self.n - 1)
else:
Expand Down Expand Up @@ -1081,8 +1081,7 @@ def apply(self, other):

@apply_index_wraps
def apply_index(self, i):
months = self.n - 1 if self.n >= 0 else self.n
shifted = tslib.shift_months(i.asi8, months, 'end')
shifted = tslib.shift_months(i.asi8, self.n, 'end')
return i._shallow_copy(shifted)

def onOffset(self, dt):
Expand All @@ -1108,8 +1107,7 @@ def apply(self, other):

@apply_index_wraps
def apply_index(self, i):
months = self.n + 1 if self.n < 0 else self.n
shifted = tslib.shift_months(i.asi8, months, 'start')
shifted = tslib.shift_months(i.asi8, self.n, 'start')
return i._shallow_copy(shifted)

def onOffset(self, dt):
Expand Down Expand Up @@ -1777,6 +1775,7 @@ def apply(self, other):
@apply_index_wraps
def apply_index(self, i):
freq_month = 12 if self.startingMonth == 1 else self.startingMonth - 1
# freq_month = self.startingMonth
freqstr = 'Q-%s' % (_int_to_month[freq_month],)
return self._beg_apply_index(i, freqstr)

Expand Down
15 changes: 10 additions & 5 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -2622,7 +2622,8 @@ def test_datetime64_with_DateOffset(self):
assert_func(result, exp)

s = klass([Timestamp('2000-01-05 00:15:00'), Timestamp('2000-01-31 00:23:00'),
Timestamp('2000-01-01'), Timestamp('2000-02-29'), Timestamp('2000-12-31')])
Timestamp('2000-01-01'), Timestamp('2000-03-31'),
Timestamp('2000-02-29'), Timestamp('2000-12-31')])

#DateOffset relativedelta fastpath
relative_kwargs = [('years', 2), ('months', 5), ('days', 3),
Expand Down Expand Up @@ -2659,11 +2660,15 @@ def test_datetime64_with_DateOffset(self):
else:
do = do
kwargs = {}
op = getattr(pd.offsets,do)(5, normalize=normalize, **kwargs)
assert_func(klass([x + op for x in s]), s + op)
assert_func(klass([x - op for x in s]), s - op)
assert_func(klass([op + x for x in s]), op + s)

for n in [0, 5]:
if (do in ['WeekOfMonth','LastWeekOfMonth',
'FY5253Quarter','FY5253'] and n == 0):
continue
op = getattr(pd.offsets,do)(n, normalize=normalize, **kwargs)
assert_func(klass([x + op for x in s]), s + op)
assert_func(klass([x - op for x in s]), s - op)
assert_func(klass([op + x for x in s]), op + s)
# def test_add_timedelta64(self):
# rng = date_range('1/1/2000', periods=5)
# delta = rng.values[3] - rng.values[1]
Expand Down
43 changes: 26 additions & 17 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4458,7 +4458,8 @@ def shift_months(int64_t[:] dtindex, int months, object day=None):
Py_ssize_t i
pandas_datetimestruct dts
int count = len(dtindex)
int days_in_current_month
int months_to_roll
bint roll_check
int64_t[:] out = np.empty(count, dtype='int64')

if day is None:
Expand All @@ -4472,36 +4473,44 @@ def shift_months(int64_t[:] dtindex, int months, object day=None):
dts.day = min(dts.day, days_in_month(dts))
out[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
elif day == 'start':
roll_check = False
if months <= 0:
months += 1
roll_check = True
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT: out[i] = NPY_NAT; continue
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
dts.year = _year_add_months(dts, months)
dts.month = _month_add_months(dts, months)
months_to_roll = months

# offset semantics - if on the anchor point and going backwards
# shift to next
if roll_check and dts.day == 1:
months_to_roll -= 1

dts.year = _year_add_months(dts, months_to_roll)
dts.month = _month_add_months(dts, months_to_roll)
dts.day = 1

# offset semantics - when subtracting if at the start anchor
# point, shift back by one more month
if months <= 0 and dts.day == 1:
dts.year = _year_add_months(dts, -1)
dts.month = _month_add_months(dts, -1)
else:
dts.day = 1
out[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
elif day == 'end':
roll_check = False
if months > 0:
months -= 1
roll_check = True
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT: out[i] = NPY_NAT; continue
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
days_in_current_month = days_in_month(dts)

dts.year = _year_add_months(dts, months)
dts.month = _month_add_months(dts, months)
months_to_roll = months

# similar semantics - when adding shift forward by one
# month if already at an end of month
if months >= 0 and dts.day == days_in_current_month:
dts.year = _year_add_months(dts, 1)
dts.month = _month_add_months(dts, 1)
if roll_check and dts.day == days_in_month(dts):
months_to_roll += 1

dts.year = _year_add_months(dts, months_to_roll)
dts.month = _month_add_months(dts, months_to_roll)

dts.day = days_in_month(dts)
out[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
Expand Down