From 3a6fc5b5ae40a651c47597c48ffff147472b5ffe Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 13 Nov 2017 21:33:49 -0800 Subject: [PATCH 1/3] simplify algebra in Year offset apply methods --- pandas/_libs/tslibs/offsets.pyx | 33 +++++++++++++ pandas/tseries/offsets.py | 83 ++++----------------------------- 2 files changed, 43 insertions(+), 73 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 2d8ce4c59fedc..ce719874ab3dc 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -428,3 +428,36 @@ cpdef datetime shift_month(datetime stamp, int months, object day_opt=None): else: raise ValueError(day_opt) return stamp.replace(year=year, month=month, day=day) + + +cpdef int _get_day_of_month(datetime other, day_opt): + if day_opt == 'start': + return 1 + elif day_opt == 'end': + return monthrange(other.year, other.month)[1] + else: + raise ValueError(day_opt) + + +cpdef int roll_yearday(other, n, month, day_opt='start'): + """ + Possibly increment or decrement the number of periods to shift + based on rollforward/rollbackward conventions. + + Mirrors `roll_check` in tslib.shift_months + """ + # Note: The other.day < ... condition will never hold when day_opt=='start' + # and the other.day > ... condition will never hold when day_opt=='end'. + # At some point these extra checks may need to be optimized away. + # But that point isn't today. + if n > 0: + if other.month < month or (other.month == month and + other.day < _get_day_of_month(other, + day_opt)): + n -= 1 + elif n <= 0: + if other.month > month or (other.month == month and + other.day > _get_day_of_month(other, + day_opt)): + n += 1 + return n diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 4dc26f4dd69e2..bbc2fae2baec5 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -22,6 +22,7 @@ _int_to_weekday, _weekday_to_int, _determine_offset, apply_index_wraps, + roll_yearday, shift_month, BeginMixin, EndMixin, BaseOffset) @@ -1943,49 +1944,12 @@ class YearEnd(EndMixin, YearOffset): @apply_wraps def apply(self, other): - def _increment(date): - if date.month == self.month: - _, days_in_month = tslib.monthrange(date.year, self.month) - if date.day != days_in_month: - year = date.year - else: - year = date.year + 1 - elif date.month < self.month: - year = date.year - else: - year = date.year + 1 - _, days_in_month = tslib.monthrange(year, self.month) - return datetime(year, self.month, days_in_month, - date.hour, date.minute, date.second, - date.microsecond) - - def _decrement(date): - year = date.year if date.month > self.month else date.year - 1 - _, days_in_month = tslib.monthrange(year, self.month) - return datetime(year, self.month, days_in_month, - date.hour, date.minute, date.second, - date.microsecond) - - def _rollf(date): - if date.month != self.month or\ - date.day < tslib.monthrange(date.year, date.month)[1]: - date = _increment(date) - return date - - n = self.n - result = other - if n > 0: - while n > 0: - result = _increment(result) - n -= 1 - elif n < 0: - while n < 0: - result = _decrement(result) - n += 1 - else: - # n == 0, roll forward - result = _rollf(result) - return result + n = roll_yearday(other, self.n, self.month, 'end') + year = other.year + n + days_in_month = tslib.monthrange(year, self.month)[1] + return datetime(year, self.month, days_in_month, + other.hour, other.minute, other.second, + other.microsecond) @apply_index_wraps def apply_index(self, i): @@ -2006,36 +1970,9 @@ class YearBegin(BeginMixin, YearOffset): @apply_wraps def apply(self, other): - def _increment(date, n): - year = date.year + n - 1 - if date.month >= self.month: - year += 1 - return datetime(year, self.month, 1, date.hour, date.minute, - date.second, date.microsecond) - - def _decrement(date, n): - year = date.year + n + 1 - if date.month < self.month or (date.month == self.month and - date.day == 1): - year -= 1 - return datetime(year, self.month, 1, date.hour, date.minute, - date.second, date.microsecond) - - def _rollf(date): - if (date.month != self.month) or date.day > 1: - date = _increment(date, 1) - return date - - n = self.n - result = other - if n > 0: - result = _increment(result, n) - elif n < 0: - result = _decrement(result, n) - else: - # n == 0, roll forward - result = _rollf(result) - return result + n = roll_yearday(other, self.n, self.month, 'start') + year = other.year + n + return other.replace(year=year, month=self.month, day=1) @apply_index_wraps def apply_index(self, i): From d3553abcff61de74a3a3d67de1ed29f08b641560 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 14 Nov 2017 07:57:03 -0800 Subject: [PATCH 2/3] docstrings+examples per request --- pandas/_libs/tslibs/offsets.pyx | 81 ++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index ce719874ab3dc..84deef4da2bf9 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -430,7 +430,31 @@ cpdef datetime shift_month(datetime stamp, int months, object day_opt=None): return stamp.replace(year=year, month=month, day=day) -cpdef int _get_day_of_month(datetime other, day_opt): +cdef int get_day_of_month(datetime other, day_opt): + """ + Find the day in `other`'s month that satisfies a DateOffset's onOffset + policy, as described by the `day_opt` argument. + + Parameters + ---------- + other : datetime or Timestamp + day_opt : 'start', 'end' + 'start': returns 1 + 'end': returns last day of the month + + Returns + ------- + day_of_month : int + + Examples + ------- + >>> other = datetime(2017, 11, 14) + >>> get_day_of_month(other, 'start') + 1 + >>> get_day_of_month(other, 'end') + 30 + + """ if day_opt == 'start': return 1 elif day_opt == 'end': @@ -444,7 +468,52 @@ cpdef int roll_yearday(other, n, month, day_opt='start'): Possibly increment or decrement the number of periods to shift based on rollforward/rollbackward conventions. - Mirrors `roll_check` in tslib.shift_months + Parameters + ---------- + other : datetime or Timestamp + n : number of periods to increment, before adjusting for rolling + day_opt : 'start', 'end' + 'start': returns 1 + 'end': returns last day of the month + + Returns + ------- + n : int number of periods to increment + + Notes + ----- + * Mirrors `roll_check` in tslib.shift_months + + Examples + ------- + >>> month = 3 + >>> day_opt = 'start' # `other` will be compared to March 1 + >>> other = datetime(2017, 2, 10) # before March 1 + >>> roll_yearday(other, 2, month, day_opt) + 1 + >>> roll_yearday(other, -7, month, day_opt) + -7 + >>> + >>> other = Timestamp('2014-03-15', tz='US/Eastern') # after March 1 + >>> roll_yearday(other, 2, month, day_opt) + 2 + >>> roll_yearday(other, -7, month, day_opt) + -6 + + >>> month = 6 + >>> day_opt = 'end' # `other` will be compared to June 30 + >>> other = datetime(1999, 6, 29) # before June 30 + >>> roll_yearday(other, 5, month, day_opt) + 4 + >>> roll_yearday(other, -7, month, day_opt) + -7 + >>> + >>> other = Timestamp(2072, 8, 24, 6, 17, 18) # after June 30 + >>> roll_yearday(other, 5, month, day_opt) + 5 + >>> roll_yearday(other, -7, month, day_opt) + -6 + """ # Note: The other.day < ... condition will never hold when day_opt=='start' # and the other.day > ... condition will never hold when day_opt=='end'. @@ -452,12 +521,12 @@ cpdef int roll_yearday(other, n, month, day_opt='start'): # But that point isn't today. if n > 0: if other.month < month or (other.month == month and - other.day < _get_day_of_month(other, - day_opt)): + other.day < get_day_of_month(other, + day_opt)): n -= 1 elif n <= 0: if other.month > month or (other.month == month and - other.day > _get_day_of_month(other, - day_opt)): + other.day > get_day_of_month(other, + day_opt)): n += 1 return n From 369da473a878e0fa2a51d22806296dea204e0dbe Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 15 Nov 2017 08:08:09 -0800 Subject: [PATCH 3/3] unit tests for liboffsets; fix an unraises ValueError --- pandas/_libs/tslibs/offsets.pyx | 4 +- .../tests/tseries/offsets/test_liboffsets.py | 63 +++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 pandas/tests/tseries/offsets/test_liboffsets.py diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 84deef4da2bf9..54f3f16700b65 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -430,7 +430,7 @@ cpdef datetime shift_month(datetime stamp, int months, object day_opt=None): return stamp.replace(year=year, month=month, day=day) -cdef int get_day_of_month(datetime other, day_opt): +cdef int get_day_of_month(datetime other, day_opt) except? -1: """ Find the day in `other`'s month that satisfies a DateOffset's onOffset policy, as described by the `day_opt` argument. @@ -463,7 +463,7 @@ cdef int get_day_of_month(datetime other, day_opt): raise ValueError(day_opt) -cpdef int roll_yearday(other, n, month, day_opt='start'): +cpdef int roll_yearday(other, n, month, day_opt='start') except? -1: """ Possibly increment or decrement the number of periods to shift based on rollforward/rollbackward conventions. diff --git a/pandas/tests/tseries/offsets/test_liboffsets.py b/pandas/tests/tseries/offsets/test_liboffsets.py new file mode 100644 index 0000000000000..e3ec6fdf2fbcc --- /dev/null +++ b/pandas/tests/tseries/offsets/test_liboffsets.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +""" +Tests for helper functions in the cython tslibs.offsets +""" +from datetime import datetime + +import pytest + +import pandas as pd + +import pandas._libs.tslibs.offsets as liboffsets + + +def test_shift_month(): + dt = datetime(2017, 11, 15) + + assert liboffsets.shift_month(dt, 0, day_opt=None) == dt + assert liboffsets.shift_month(dt, 0, day_opt=15) == dt + + assert liboffsets.shift_month(dt, 1, + day_opt='start') == datetime(2017, 12, 1) + + assert liboffsets.shift_month(dt, -145, + day_opt='end') == datetime(2005, 10, 31) + + with pytest.raises(ValueError): + liboffsets.shift_month(dt, 3, day_opt='this should raise') + + +def test_get_day_of_month(): + # get_day_of_month is not directly exposed; we test it via roll_yearday + dt = datetime(2017, 11, 15) + + with pytest.raises(ValueError): + # To hit the raising case we need month == dt.month and n > 0 + liboffsets.roll_yearday(dt, n=3, month=11, day_opt='foo') + + +def test_roll_yearday(): + # Copied from doctest examples + month = 3 + day_opt = 'start' # `other` will be compared to March 1 + other = datetime(2017, 2, 10) # before March 1 + assert liboffsets.roll_yearday(other, 2, month, day_opt) == 1 + assert liboffsets.roll_yearday(other, -7, month, day_opt) == -7 + assert liboffsets.roll_yearday(other, 0, month, day_opt) == 0 + + other = pd.Timestamp('2014-03-15', tz='US/Eastern') # after March 1 + assert liboffsets.roll_yearday(other, 2, month, day_opt) == 2 + assert liboffsets.roll_yearday(other, -7, month, day_opt) == -6 + assert liboffsets.roll_yearday(other, 0, month, day_opt) == 1 + + month = 6 + day_opt = 'end' # `other` will be compared to June 30 + other = datetime(1999, 6, 29) # before June 30 + assert liboffsets.roll_yearday(other, 5, month, day_opt) == 4 + assert liboffsets.roll_yearday(other, -7, month, day_opt) == -7 + assert liboffsets.roll_yearday(other, 0, month, day_opt) == 0 + + other = pd.Timestamp(2072, 8, 24, 6, 17, 18) # after June 30 + assert liboffsets.roll_yearday(other, 5, month, day_opt) == 5 + assert liboffsets.roll_yearday(other, -7, month, day_opt) == -6 + assert liboffsets.roll_yearday(other, 0, month, day_opt) == 1