Skip to content

implement shift_quarters --> apply_index for quarters and years #18522

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 154 additions & 2 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ np.import_array()

from util cimport is_string_object, is_integer_object

from pandas._libs.tslib import monthrange

from conversion cimport tz_convert_single, pydt_to_i8
from frequencies cimport get_freq_code
from nattype cimport NPY_NAT
Expand Down Expand Up @@ -471,6 +469,160 @@ cdef inline int month_add_months(pandas_datetimestruct dts, int months) nogil:
return 12 if new_month == 0 else new_month


@cython.wraparound(False)
@cython.boundscheck(False)
def shift_quarters(int64_t[:] dtindex, int quarters,
int q1start_month, object day, int modby=3):
"""
Given an int64 array representing nanosecond timestamps, shift all elements
by the specified number of quarters using DateOffset semantics.

Parameters
----------
dtindex : int64_t[:] timestamps for input dates
quarters : int number of quarters to shift
q1start_month : int month in which Q1 begins by convention
day : {'start', 'end', 'business_start', 'business_end'}
modby : int (3 for quarters, 12 for years)

Returns
-------
out : ndarray[int64_t]
"""
cdef:
Py_ssize_t i
pandas_datetimestruct dts
int count = len(dtindex)
int months_to_roll, months_since, n, compare_day
bint roll_check
int64_t[:] out = np.empty(count, dtype='int64')

if day == 'start':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = NPY_NAT
continue

dt64_to_dtstruct(dtindex[i], &dts)
n = quarters

months_since = (dts.month - q1start_month) % modby

# offset semantics - if on the anchor point and going backwards
# shift to next
if n <= 0 and (months_since != 0 or
(months_since == 0 and dts.day > 1)):
n += 1

dts.year = year_add_months(dts, modby * n - months_since)
dts.month = month_add_months(dts, modby * n - months_since)
dts.day = 1

out[i] = dtstruct_to_dt64(&dts)

elif day == 'end':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = NPY_NAT
continue

dt64_to_dtstruct(dtindex[i], &dts)
n = quarters

months_since = (dts.month - q1start_month) % modby

if n <= 0 and months_since != 0:
# The general case of this condition would be
# `months_since != 0 or (months_since == 0 and
# dts.day > get_days_in_month(dts.year, dts.month))`
# but the get_days_in_month inequality would never hold.
n += 1
elif n > 0 and (months_since == 0 and
dts.day < get_days_in_month(dts.year,
dts.month)):
n -= 1

dts.year = year_add_months(dts, modby * n - months_since)
dts.month = month_add_months(dts, modby * n - months_since)
dts.day = get_days_in_month(dts.year, dts.month)

out[i] = dtstruct_to_dt64(&dts)

elif day == 'business_start':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = NPY_NAT
continue

dt64_to_dtstruct(dtindex[i], &dts)
n = quarters

months_since = (dts.month - q1start_month) % modby
compare_month = dts.month - months_since
compare_month = compare_month or 12
# compare_day is only relevant for comparison in the case
# where months_since == 0.
compare_day = get_firstbday(dts.year, compare_month)

if n <= 0 and (months_since != 0 or
(months_since == 0 and dts.day > compare_day)):
# make sure to roll forward, so negate
n += 1
elif n > 0 and (months_since == 0 and dts.day < compare_day):
# pretend to roll back if on same month but
# before compare_day
n -= 1

dts.year = year_add_months(dts, modby * n - months_since)
dts.month = month_add_months(dts, modby * n - months_since)

dts.day = get_firstbday(dts.year, dts.month)

out[i] = dtstruct_to_dt64(&dts)

elif day == 'business_end':
with nogil:
for i in range(count):
if dtindex[i] == NPY_NAT:
out[i] = NPY_NAT
continue

dt64_to_dtstruct(dtindex[i], &dts)
n = quarters

months_since = (dts.month - q1start_month) % modby
compare_month = dts.month - months_since
compare_month = compare_month or 12
# compare_day is only relevant for comparison in the case
# where months_since == 0.
compare_day = get_lastbday(dts.year, compare_month)

if n <= 0 and (months_since != 0 or
(months_since == 0 and dts.day > compare_day)):
# make sure to roll forward, so negate
n += 1
elif n > 0 and (months_since == 0 and dts.day < compare_day):
# pretend to roll back if on same month but
# before compare_day
n -= 1

dts.year = year_add_months(dts, modby * n - months_since)
dts.month = month_add_months(dts, modby * n - months_since)

dts.day = get_lastbday(dts.year, dts.month)

out[i] = dtstruct_to_dt64(&dts)

else:
raise ValueError("day must be None, 'start', 'end', "
"'business_start', or 'business_end'")

return np.asarray(out)


@cython.wraparound(False)
@cython.boundscheck(False)
def shift_months(int64_t[:] dtindex, int months, object day=None):
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/tseries/offsets/test_yqm_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,15 @@ def test_quarterly_dont_normalize():
assert (result.time() == date.time())


@pytest.mark.parametrize('offset', [MonthBegin(), MonthEnd(),
BMonthBegin(), BMonthEnd()])
def test_apply_index(offset):
@pytest.mark.parametrize('n', [-2, 1])
@pytest.mark.parametrize('cls', [MonthBegin, MonthEnd,
BMonthBegin, BMonthEnd,
QuarterBegin, QuarterEnd,
BQuarterBegin, BQuarterEnd,
YearBegin, YearEnd,
BYearBegin, BYearEnd])
def test_apply_index(cls, n):
offset = cls(n=n)
rng = pd.date_range(start='1/1/2000', periods=100000, freq='T')
ser = pd.Series(rng)

Expand Down
78 changes: 30 additions & 48 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
apply_index_wraps,
roll_yearday,
shift_month,
BeginMixin, EndMixin,
EndMixin,
BaseOffset)


Expand Down Expand Up @@ -1028,10 +1028,7 @@ def cbday(self):

@cache_readonly
def m_offset(self):
kwds = self.kwds
kwds = {key: kwds[key] for key in kwds
if key not in ['calendar', 'weekmask', 'holidays', 'offset']}
return MonthEnd(n=1, normalize=self.normalize, **kwds)
return MonthEnd(n=1, normalize=self.normalize)

@apply_wraps
def apply(self, other):
Expand Down Expand Up @@ -1106,10 +1103,7 @@ def cbday(self):

@cache_readonly
def m_offset(self):
kwds = self.kwds
kwds = {key: kwds[key] for key in kwds
if key not in ['calendar', 'weekmask', 'holidays', 'offset']}
return MonthBegin(n=1, normalize=self.normalize, **kwds)
return MonthBegin(n=1, normalize=self.normalize)

@apply_wraps
def apply(self, other):
Expand Down Expand Up @@ -1254,12 +1248,9 @@ def onOffset(self, dt):

def _apply(self, n, other):
# if other.day is not day_of_month move to day_of_month and update n
if other.day < self.day_of_month:
other = other.replace(day=self.day_of_month)
if n > 0:
n -= 1
if n > 0 and other.day < self.day_of_month:
n -= 1
elif other.day > self.day_of_month:
other = other.replace(day=self.day_of_month)
n += 1

months = n // 2
Expand Down Expand Up @@ -1309,12 +1300,9 @@ def onOffset(self, dt):
def _apply(self, n, other):
# if other.day is not day_of_month move to day_of_month and update n
if other.day < self.day_of_month:
other = other.replace(day=self.day_of_month)
n -= 1
elif other.day > self.day_of_month:
other = other.replace(day=self.day_of_month)
if n <= 0:
n += 1
elif n <= 0 and other.day > self.day_of_month:
n += 1

months = n // 2 + n % 2
day = 1 if n % 2 else self.day_of_month
Expand Down Expand Up @@ -1471,6 +1459,7 @@ def apply(self, other):
def getOffsetOfMonth(self, dt):
w = Week(weekday=self.weekday)
d = datetime(dt.year, dt.month, 1, tzinfo=dt.tzinfo)
# TODO: Is this DST-safe?
d = w.rollforward(d)
return d + timedelta(weeks=self.week)

Expand Down Expand Up @@ -1550,6 +1539,7 @@ def getOffsetOfMonth(self, dt):
d = datetime(dt.year, dt.month, 1, dt.hour, dt.minute,
dt.second, dt.microsecond, tzinfo=dt.tzinfo)
eom = m.rollforward(d)
# TODO: Is this DST-safe?
w = Week(weekday=self.weekday)
return w.rollback(eom)

Expand Down Expand Up @@ -1635,6 +1625,12 @@ def onOffset(self, dt):
modMonth = (dt.month - self.startingMonth) % 3
return modMonth == 0 and dt.day == self._get_offset_day(dt)

@apply_index_wraps
def apply_index(self, dtindex):
shifted = liboffsets.shift_quarters(dtindex.asi8, self.n,
self.startingMonth, self._day_opt)
return dtindex._shallow_copy(shifted)


class BQuarterEnd(QuarterOffset):
"""DateOffset increments between business Quarter dates
Expand All @@ -1659,7 +1655,7 @@ class BQuarterBegin(QuarterOffset):
_day_opt = 'business_start'


class QuarterEnd(EndMixin, QuarterOffset):
class QuarterEnd(QuarterOffset):
"""DateOffset increments between business Quarter dates
startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ...
startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ...
Expand All @@ -1670,25 +1666,14 @@ class QuarterEnd(EndMixin, QuarterOffset):
_prefix = 'Q'
_day_opt = 'end'

@apply_index_wraps
def apply_index(self, i):
return self._end_apply_index(i, self.freqstr)


class QuarterBegin(BeginMixin, QuarterOffset):
class QuarterBegin(QuarterOffset):
_outputName = 'QuarterBegin'
_default_startingMonth = 3
_from_name_startingMonth = 1
_prefix = 'QS'
_day_opt = 'start'

@apply_index_wraps
def apply_index(self, i):
freq_month = 12 if self.startingMonth == 1 else self.startingMonth - 1
month = liboffsets._int_to_month[freq_month]
freqstr = 'Q-{month}'.format(month=month)
return self._beg_apply_index(i, freqstr)


# ---------------------------------------------------------------------
# Year-Based Offset Classes
Expand All @@ -1709,6 +1694,13 @@ def apply(self, other):
months = years * 12 + (self.month - other.month)
return shift_month(other, months, self._day_opt)

@apply_index_wraps
def apply_index(self, dtindex):
shifted = liboffsets.shift_quarters(dtindex.asi8, self.n,
self.month, self._day_opt,
modby=12)
return dtindex._shallow_copy(shifted)

def onOffset(self, dt):
if self.normalize and not _is_normalized(dt):
return False
Expand Down Expand Up @@ -1752,31 +1744,19 @@ class BYearBegin(YearOffset):
_day_opt = 'business_start'


class YearEnd(EndMixin, YearOffset):
class YearEnd(YearOffset):
"""DateOffset increments between calendar year ends"""
_default_month = 12
_prefix = 'A'
_day_opt = 'end'

@apply_index_wraps
def apply_index(self, i):
# convert month anchor to annual period tuple
return self._end_apply_index(i, self.freqstr)


class YearBegin(BeginMixin, YearOffset):
class YearBegin(YearOffset):
"""DateOffset increments between calendar year begin dates"""
_default_month = 1
_prefix = 'AS'
_day_opt = 'start'

@apply_index_wraps
def apply_index(self, i):
freq_month = 12 if self.month == 1 else self.month - 1
month = liboffsets._int_to_month[freq_month]
freqstr = 'A-{month}'.format(month=month)
return self._beg_apply_index(i, freqstr)


# ---------------------------------------------------------------------
# Special Offset Classes
Expand Down Expand Up @@ -2245,7 +2225,8 @@ def __eq__(self, other):
if isinstance(other, Tick):
return self.delta == other.delta
else:
return DateOffset.__eq__(self, other)
# TODO: Are there cases where this should raise TypeError?
return False

# This is identical to DateOffset.__hash__, but has to be redefined here
# for Python 3, because we've redefined __eq__.
Expand All @@ -2261,7 +2242,8 @@ def __ne__(self, other):
if isinstance(other, Tick):
return self.delta != other.delta
else:
return DateOffset.__ne__(self, other)
# TODO: Are there cases where this should raise TypeError?
return True

@property
def delta(self):
Expand Down