From 34f13097ddc4dfe6a1ed1cb60bb229a0aa90282b Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Fri, 17 Mar 2017 17:08:02 +0000 Subject: [PATCH 01/13] Adding window slicing endpoint inclusion selection to VariableWindowIndexer --- pandas/core/window.pyx | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/core/window.pyx b/pandas/core/window.pyx index a06e616002ee2..ae5373af5582d 100644 --- a/pandas/core/window.pyx +++ b/pandas/core/window.pyx @@ -232,7 +232,7 @@ cdef class VariableWindowIndexer(WindowIndexer): """ def __init__(self, ndarray input, int64_t win, int64_t minp, - ndarray index): + ndarray index, closed='right'): self.is_variable = 1 self.N = len(index) @@ -249,6 +249,9 @@ cdef class VariableWindowIndexer(WindowIndexer): # max window size self.win = (self.end - self.start).max() + if closed not in ['right', 'left', 'both', 'neither']: + closed = 'right' + def build(self, ndarray[int64_t] index, int64_t win): cdef: @@ -261,7 +264,10 @@ cdef class VariableWindowIndexer(WindowIndexer): N = self.N start[0] = 0 - end[0] = 1 + if closed in ['right', 'both']: + end[0] = 1 + else: + end[0] = 0 with nogil: @@ -271,6 +277,9 @@ cdef class VariableWindowIndexer(WindowIndexer): end_bound = index[i] start_bound = index[i] - win + if closed in ['left', 'both']: + start_bound -= 1 + # advance the start bound until we are # within the constraint start[i] = i @@ -286,6 +295,9 @@ cdef class VariableWindowIndexer(WindowIndexer): else: end[i] = end[i - 1] + if closed in ['left', 'neither']: + end[i] -= 1 + def get_window_indexer(input, win, minp, index, floor=None, use_mock=True): From da034bf37f61a7f29714b3dcd252b8cdf7121b7a Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Wed, 22 Mar 2017 18:55:00 +0000 Subject: [PATCH 02/13] Commiting progress --- pandas/core/generic.py | 4 +- pandas/core/window.py | 28 +++++++--- pandas/core/window.pyx | 103 ++++++++++++++++++++++-------------- pandas/tests/test_window.py | 28 ++++++++++ 4 files changed, 114 insertions(+), 49 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ad56ea44a0dc6..d89acfcc282c5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5962,12 +5962,12 @@ def _add_series_or_dataframe_operations(cls): @Appender(rwindow.rolling.__doc__) def rolling(self, window, min_periods=None, freq=None, center=False, - win_type=None, on=None, axis=0): + win_type=None, on=None, axis=0, closed='right'): axis = self._get_axis_number(axis) return rwindow.rolling(self, window=window, min_periods=min_periods, freq=freq, center=center, win_type=win_type, - on=on, axis=axis) + on=on, axis=axis, closed=closed) cls.rolling = rolling diff --git a/pandas/core/window.py b/pandas/core/window.py index 89d2f5b24d77e..f050614f86292 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -56,11 +56,12 @@ class _Window(PandasObject, SelectionMixin): _attributes = ['window', 'min_periods', 'freq', 'center', 'win_type', - 'axis', 'on'] + 'axis', 'on', 'closed'] exclusions = set() def __init__(self, obj, window=None, min_periods=None, freq=None, - center=False, win_type=None, axis=0, on=None, **kwargs): + center=False, win_type=None, axis=0, on=None, closed='right', + **kwargs): if freq is not None: warnings.warn("The freq kw is deprecated and will be removed in a " @@ -71,6 +72,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None, self.blocks = [] self.obj = obj self.on = on + self.closed = closed self.window = window self.min_periods = min_periods self.freq = freq @@ -101,6 +103,9 @@ def validate(self): if self.min_periods is not None and not \ is_integer(self.min_periods): raise ValueError("min_periods must be an integer") + if self.closed not in ['right', 'both', 'left', 'neither']: + raise ValueError("closed must be 'right', 'left', 'both' or " + "'neither'") def _convert_freq(self, how=None): """ resample according to the how, return a new object """ @@ -374,8 +379,12 @@ class Window(_Window): on : string, optional For a DataFrame, column on which to calculate the rolling window, rather than the index + closed : 'right', 'left', 'both', 'neither' + For offset-based windows, make the interval closed on the right, left, + or on both endpoints. Can also make the interval open on both endpoints + (neither). - .. versionadded:: 0.19.0 + .. versionadded:: 0.20.0 axis : int or string, default 0 @@ -717,12 +726,12 @@ def _apply(self, func, name=None, window=None, center=None, raise ValueError("we do not support this function " "in _window.{0}".format(func)) - def func(arg, window, min_periods=None): + def func(arg, window, min_periods=None, closed=None): minp = check_minp(min_periods, window) # ensure we are only rolling on floats arg = _ensure_float64(arg) return cfunc(arg, - window, minp, indexi, **kwargs) + window, minp, indexi, closed, **kwargs) # calculation function if center: @@ -731,11 +740,13 @@ def func(arg, window, min_periods=None): def calc(x): return func(np.concatenate((x, additional_nans)), - window, min_periods=self.min_periods) + window, min_periods=self.min_periods, + closed=self.closed) else: def calc(x): - return func(x, window, min_periods=self.min_periods) + return func(x, window, min_periods=self.min_periods, + closed=self.closed) with np.errstate(all='ignore'): if values.ndim > 1: @@ -788,12 +799,13 @@ def apply(self, func, args=(), kwargs={}): window = self._get_window() offset = _offset(window, self.center) index, indexi = self._get_index() + closed = self.closed def f(arg, window, min_periods): minp = _use_window(min_periods, window) return _window.roll_generic(arg, window, minp, indexi, offset, func, args, - kwargs) + kwargs, closed) return self._apply(f, func, args=args, kwargs=kwargs, center=False) diff --git a/pandas/core/window.pyx b/pandas/core/window.pyx index ae5373af5582d..568acabd6cf80 100644 --- a/pandas/core/window.pyx +++ b/pandas/core/window.pyx @@ -161,7 +161,7 @@ cdef class MockFixedWindowIndexer(WindowIndexer): """ def __init__(self, ndarray input, int64_t win, int64_t minp, - object index=None, object floor=None): + object index=None, object floor=None, closed='right'): assert index is None self.is_variable = 0 @@ -194,7 +194,7 @@ cdef class FixedWindowIndexer(WindowIndexer): """ def __init__(self, ndarray input, int64_t win, int64_t minp, - object index=None, object floor=None): + object index=None, object floor=None, closed='right'): cdef ndarray start_s, start_e, end_s, end_e assert index is None @@ -244,15 +244,27 @@ cdef class VariableWindowIndexer(WindowIndexer): self.end = np.empty(self.N, dtype='int64') self.end.fill(-1) - self.build(index, win) + cdef: + bint leftIsClosed = False + bint rightIsClosed = False + + if closed not in ['right', 'left', 'both', 'neither']: + closed = 'right' + + if closed in ['right', 'both']: + rightIsClosed = True + + if closed in ['left', 'both']: + leftIsClosed = True + + self.build(index, win, leftIsClosed, rightIsClosed) # max window size self.win = (self.end - self.start).max() - if closed not in ['right', 'left', 'both', 'neither']: - closed = 'right' - def build(self, ndarray[int64_t] index, int64_t win): + def build(self, ndarray[int64_t] index, int64_t win, bint leftIsClosed, + bint rightIsClosed): cdef: ndarray[int64_t] start, end @@ -264,7 +276,8 @@ cdef class VariableWindowIndexer(WindowIndexer): N = self.N start[0] = 0 - if closed in ['right', 'both']: + #if closed in ['right', 'both']: + if rightIsClosed: end[0] = 1 else: end[0] = 0 @@ -277,7 +290,8 @@ cdef class VariableWindowIndexer(WindowIndexer): end_bound = index[i] start_bound = index[i] - win - if closed in ['left', 'both']: + #if closed in ['left', 'both']: + if leftIsClosed: start_bound -= 1 # advance the start bound until we are @@ -295,12 +309,13 @@ cdef class VariableWindowIndexer(WindowIndexer): else: end[i] = end[i - 1] - if closed in ['left', 'neither']: - end[i] -= 1 + #if closed in ['left', 'neither']: + if not rightIsClosed: + end[i] -= 1 def get_window_indexer(input, win, minp, index, floor=None, - use_mock=True): + use_mock=True, closed='right'): """ return the correct window indexer for the computation @@ -319,6 +334,7 @@ def get_window_indexer(input, win, minp, index, floor=None, compat Indexer that allows us to use a standard code path with all of the indexers. + Returns ------- tuple of 1d int64 ndarrays of the offsets & data about the window @@ -326,11 +342,11 @@ def get_window_indexer(input, win, minp, index, floor=None, """ if index is not None: - indexer = VariableWindowIndexer(input, win, minp, index) + indexer = VariableWindowIndexer(input, win, minp, index, closed) elif use_mock: - indexer = MockFixedWindowIndexer(input, win, minp, index, floor) + indexer = MockFixedWindowIndexer(input, win, minp, index, floor, closed) else: - indexer = FixedWindowIndexer(input, win, minp, index, floor) + indexer = FixedWindowIndexer(input, win, minp, index, floor, closed) return indexer.get_data() # ---------------------------------------------------------------------- @@ -339,7 +355,7 @@ def get_window_indexer(input, win, minp, index, floor=None, def roll_count(ndarray[double_t] input, int64_t win, int64_t minp, - object index): + object index, closed='right'): cdef: double val, count_x = 0.0 int64_t s, e, nobs, N @@ -348,7 +364,7 @@ def roll_count(ndarray[double_t] input, int64_t win, int64_t minp, ndarray[double_t] output start, end, N, win, minp, _ = get_window_indexer(input, win, - minp, index) + minp, index, closed) output = np.empty(N, dtype=float) with nogil: @@ -420,7 +436,7 @@ cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: def roll_sum(ndarray[double_t] input, int64_t win, int64_t minp, - object index): + object index, closed='right'): cdef: double val, prev_x, sum_x = 0 int64_t s, e @@ -430,7 +446,8 @@ def roll_sum(ndarray[double_t] input, int64_t win, int64_t minp, ndarray[double_t] output start, end, N, win, minp, is_variable = get_window_indexer(input, win, - minp, index) + minp, index, + closed) output = np.empty(N, dtype=float) # for performance we are going to iterate @@ -535,7 +552,7 @@ cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, def roll_mean(ndarray[double_t] input, int64_t win, int64_t minp, - object index): + object index, closed='right'): cdef: double val, prev_x, result, sum_x = 0 int64_t s, e @@ -545,7 +562,8 @@ def roll_mean(ndarray[double_t] input, int64_t win, int64_t minp, ndarray[double_t] output start, end, N, win, minp, is_variable = get_window_indexer(input, win, - minp, index) + minp, index, + closed) output = np.empty(N, dtype=float) # for performance we are going to iterate @@ -659,7 +677,7 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x, def roll_var(ndarray[double_t] input, int64_t win, int64_t minp, - object index, int ddof=1): + object index, closed='right', int ddof=1): """ Numerically stable implementation using Welford's method. """ @@ -672,7 +690,8 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp, ndarray[double_t] output start, end, N, win, minp, is_variable = get_window_indexer(input, win, - minp, index) + minp, index, + closed) output = np.empty(N, dtype=float) # Check for windows larger than array, addresses #7297 @@ -801,7 +820,7 @@ cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, def roll_skew(ndarray[double_t] input, int64_t win, int64_t minp, - object index): + object index, closed='right'): cdef: double val, prev double x = 0, xx = 0, xxx = 0 @@ -812,7 +831,8 @@ def roll_skew(ndarray[double_t] input, int64_t win, int64_t minp, ndarray[double_t] output start, end, N, win, minp, is_variable = get_window_indexer(input, win, - minp, index) + minp, index, + closed) output = np.empty(N, dtype=float) if is_variable: @@ -928,7 +948,7 @@ cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, def roll_kurt(ndarray[double_t] input, int64_t win, int64_t minp, - object index): + object index, closed='right'): cdef: double val, prev double x = 0, xx = 0, xxx = 0, xxxx = 0 @@ -939,7 +959,8 @@ def roll_kurt(ndarray[double_t] input, int64_t win, int64_t minp, ndarray[double_t] output start, end, N, win, minp, is_variable = get_window_indexer(input, win, - minp, index) + minp, index, + closed) output = np.empty(N, dtype=float) if is_variable: @@ -997,7 +1018,7 @@ def roll_kurt(ndarray[double_t] input, int64_t win, int64_t minp, def roll_median_c(ndarray[float64_t] input, int64_t win, int64_t minp, - object index): + object index, closed='right'): cdef: double val, res, prev bint err=0, is_variable @@ -1014,7 +1035,7 @@ def roll_median_c(ndarray[float64_t] input, int64_t win, int64_t minp, start, end, N, win, minp, is_variable = get_window_indexer( input, win, minp, index, - use_mock=False) + use_mock=False, closed=closed) output = np.empty(N, dtype=float) sl = skiplist_init(win) @@ -1123,7 +1144,7 @@ cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs, def roll_max(ndarray[numeric] input, int64_t win, int64_t minp, - object index): + object index, closed='right'): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1135,12 +1156,15 @@ def roll_max(ndarray[numeric] input, int64_t win, int64_t minp, is below this, output a NaN index: ndarray, optional index for window computation + closed: 'right', 'left', 'both', 'neither' + make the interval closed on the right, left, + both or neither endpoints """ - return _roll_min_max(input, win, minp, index, is_max=1) + return _roll_min_max(input, win, minp, index, is_max=1, closed=closed) def roll_min(ndarray[numeric] input, int64_t win, int64_t minp, - object index): + object index, closed='right'): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1153,11 +1177,11 @@ def roll_min(ndarray[numeric] input, int64_t win, int64_t minp, index: ndarray, optional index for window computation """ - return _roll_min_max(input, win, minp, index, is_max=0) + return _roll_min_max(input, win, minp, index, is_max=0, closed=closed) cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, - object index, bint is_max): + object index, bint is_max, closed='right'): """ Moving min/max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1182,7 +1206,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, starti, endi, N, win, minp, is_variable = get_window_indexer( input, win, - minp, index) + minp, index, closed=closed) output = np.empty(N, dtype=input.dtype) @@ -1284,7 +1308,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, - int64_t minp, object index, double quantile): + int64_t minp, object index, double quantile, closed='right'): """ O(N log(window)) implementation using skip list """ @@ -1305,7 +1329,7 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, start, end, N, win, minp, is_variable = get_window_indexer( input, win, minp, index, - use_mock=False) + use_mock=False, closed=closed) output = np.empty(N, dtype=float) skiplist = IndexableSkiplist(win) @@ -1349,7 +1373,7 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, def roll_generic(ndarray[float64_t, cast=True] input, int64_t win, int64_t minp, object index, int offset, object func, - object args, object kwargs): + object args, object kwargs, closed='right'): cdef: ndarray[double_t] output, counts, bufarr float64_t *buf @@ -1367,12 +1391,13 @@ def roll_generic(ndarray[float64_t, cast=True] input, start, end, N, win, minp, is_variable = get_window_indexer(input, win, minp, index, - floor=0) + floor=0, + closed=closed) output = np.empty(N, dtype=float) counts = roll_sum(np.concatenate([np.isfinite(input).astype(float), np.array([0.] * offset)]), - win, minp, index)[offset:] + win, minp, index, closed)[offset:] if is_variable: diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 5fc31e9321f31..1de88afc67068 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3385,6 +3385,34 @@ def test_min_periods(self): result = df.rolling('2s', min_periods=1).sum() tm.assert_frame_equal(result, expected) + def test_closed(self): + df = DataFrame({'A': [1]*5}, + index = [pd.Timestamp('20130101 09:00:01'), + pd.Timestamp('20130101 09:00:02'), + pd.Timestamp('20130101 09:00:03'), + pd.Timestamp('20130101 09:00:04'), + pd.Timestamp('20130101 09:00:06')]) + + expected = df.copy() + expected["A"] = [1.0, 2, 2, 2, 1] + result = df.rolling('2s', closed='right').sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [1.0, 2, 3, 3, 2] + result = df.rolling('2s', closed='both').sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [np.nan, 1.0, 2, 2, 1] + result = df.rolling('2s', closed='left').sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [np.nan, 1.0, 1, 1, np.nan] + result = df.rolling('2s', closed='left').sum() + tm.assert_frame_equal(result, expected) + def test_ragged_sum(self): df = self.ragged From 2cf68047749b64c380679490a65d5fd1336198d9 Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Thu, 23 Mar 2017 22:02:26 +0000 Subject: [PATCH 03/13] Time-based windows working MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ‘Closed’ feature is working as expected in time-based windows. Sum tests are passing. --- pandas/core/window.py | 14 ++--- pandas/core/window.pyx | 107 +++++++++++++++++++----------------- pandas/tests/test_window.py | 2 +- 3 files changed, 64 insertions(+), 59 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index f050614f86292..a77ff850f7915 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -779,7 +779,8 @@ def count(self): for b in blocks: result = b.notnull().astype(int) result = self._constructor(result, window=window, min_periods=0, - center=self.center).sum() + center=self.center, + closed=self.closed).sum() results.append(result) return self._wrap_results(results, blocks, obj) @@ -801,11 +802,10 @@ def apply(self, func, args=(), kwargs={}): index, indexi = self._get_index() closed = self.closed - def f(arg, window, min_periods): + def f(arg, window, min_periods, closed): minp = _use_window(min_periods, window) - return _window.roll_generic(arg, window, minp, indexi, - offset, func, args, - kwargs, closed) + return _window.roll_generic(arg, window, minp, indexi, closed, + offset, func, args, kwargs) return self._apply(f, func, args=args, kwargs=kwargs, center=False) @@ -876,7 +876,7 @@ def std(self, ddof=1, *args, **kwargs): def f(arg, *args, **kwargs): minp = _require_min_periods(1)(self.min_periods, window) return _zsqrt(_window.roll_var(arg, window, minp, indexi, - ddof)) + self.closed, ddof)) return self._apply(f, 'std', check_minp=_require_min_periods(1), ddof=ddof, **kwargs) @@ -923,7 +923,7 @@ def quantile(self, quantile, **kwargs): def f(arg, *args, **kwargs): minp = _use_window(self.min_periods, window) return _window.roll_quantile(arg, window, minp, indexi, - quantile) + self.closed, quantile) return self._apply(f, 'quantile', quantile=quantile, **kwargs) diff --git a/pandas/core/window.pyx b/pandas/core/window.pyx index 568acabd6cf80..032a9101ae83b 100644 --- a/pandas/core/window.pyx +++ b/pandas/core/window.pyx @@ -161,7 +161,8 @@ cdef class MockFixedWindowIndexer(WindowIndexer): """ def __init__(self, ndarray input, int64_t win, int64_t minp, - object index=None, object floor=None, closed='right'): + object index=None, object floor=None, + bint l_closed=False, bint r_closed=True): assert index is None self.is_variable = 0 @@ -194,7 +195,8 @@ cdef class FixedWindowIndexer(WindowIndexer): """ def __init__(self, ndarray input, int64_t win, int64_t minp, - object index=None, object floor=None, closed='right'): + object index=None, object floor=None, + bint l_closed=False, bint r_closed=True): cdef ndarray start_s, start_e, end_s, end_e assert index is None @@ -232,7 +234,7 @@ cdef class VariableWindowIndexer(WindowIndexer): """ def __init__(self, ndarray input, int64_t win, int64_t minp, - ndarray index, closed='right'): + ndarray index, bint l_closed=False, bint r_closed=True): self.is_variable = 1 self.N = len(index) @@ -244,27 +246,14 @@ cdef class VariableWindowIndexer(WindowIndexer): self.end = np.empty(self.N, dtype='int64') self.end.fill(-1) - cdef: - bint leftIsClosed = False - bint rightIsClosed = False - - if closed not in ['right', 'left', 'both', 'neither']: - closed = 'right' - - if closed in ['right', 'both']: - rightIsClosed = True - - if closed in ['left', 'both']: - leftIsClosed = True - - self.build(index, win, leftIsClosed, rightIsClosed) + self.build(index, win, l_closed, r_closed) # max window size self.win = (self.end - self.start).max() - def build(self, ndarray[int64_t] index, int64_t win, bint leftIsClosed, - bint rightIsClosed): + def build(self, ndarray[int64_t] index, int64_t win, bint l_closed, + bint r_closed): cdef: ndarray[int64_t] start, end @@ -276,10 +265,10 @@ cdef class VariableWindowIndexer(WindowIndexer): N = self.N start[0] = 0 - #if closed in ['right', 'both']: - if rightIsClosed: + + if r_closed: # right endpoint is closed end[0] = 1 - else: + else: # right endpoint is open end[0] = 0 with nogil: @@ -290,8 +279,7 @@ cdef class VariableWindowIndexer(WindowIndexer): end_bound = index[i] start_bound = index[i] - win - #if closed in ['left', 'both']: - if leftIsClosed: + if l_closed: # left endpoint is closed start_bound -= 1 # advance the start bound until we are @@ -309,13 +297,13 @@ cdef class VariableWindowIndexer(WindowIndexer): else: end[i] = end[i - 1] - #if closed in ['left', 'neither']: - if not rightIsClosed: + # right endpoint is open + if not r_closed: end[i] -= 1 -def get_window_indexer(input, win, minp, index, floor=None, - use_mock=True, closed='right'): +def get_window_indexer(input, win, minp, index, closed, + floor=None, use_mock=True): """ return the correct window indexer for the computation @@ -341,12 +329,28 @@ def get_window_indexer(input, win, minp, index, floor=None, """ + cdef: + bint l_closed = False + bint r_closed = False + + if closed not in ['right', 'left', 'both', 'neither']: + closed = 'right' + + if closed in ['right', 'both']: + r_closed = True + + if closed in ['left', 'both']: + l_closed = True + if index is not None: - indexer = VariableWindowIndexer(input, win, minp, index, closed) + indexer = VariableWindowIndexer(input, win, minp, index, l_closed, + r_closed) elif use_mock: - indexer = MockFixedWindowIndexer(input, win, minp, index, floor, closed) + indexer = MockFixedWindowIndexer(input, win, minp, index, floor, + l_closed, r_closed) else: - indexer = FixedWindowIndexer(input, win, minp, index, floor, closed) + indexer = FixedWindowIndexer(input, win, minp, index, floor, l_closed, + r_closed) return indexer.get_data() # ---------------------------------------------------------------------- @@ -436,7 +440,7 @@ cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: def roll_sum(ndarray[double_t] input, int64_t win, int64_t minp, - object index, closed='right'): + object index, str closed): cdef: double val, prev_x, sum_x = 0 int64_t s, e @@ -552,7 +556,7 @@ cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, def roll_mean(ndarray[double_t] input, int64_t win, int64_t minp, - object index, closed='right'): + object index, str closed): cdef: double val, prev_x, result, sum_x = 0 int64_t s, e @@ -677,7 +681,7 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x, def roll_var(ndarray[double_t] input, int64_t win, int64_t minp, - object index, closed='right', int ddof=1): + object index, str closed, int ddof=1): """ Numerically stable implementation using Welford's method. """ @@ -820,7 +824,7 @@ cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, def roll_skew(ndarray[double_t] input, int64_t win, int64_t minp, - object index, closed='right'): + object index, str closed): cdef: double val, prev double x = 0, xx = 0, xxx = 0 @@ -948,7 +952,7 @@ cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, def roll_kurt(ndarray[double_t] input, int64_t win, int64_t minp, - object index, closed='right'): + object index, str closed): cdef: double val, prev double x = 0, xx = 0, xxx = 0, xxxx = 0 @@ -1018,7 +1022,7 @@ def roll_kurt(ndarray[double_t] input, int64_t win, int64_t minp, def roll_median_c(ndarray[float64_t] input, int64_t win, int64_t minp, - object index, closed='right'): + object index, str closed): cdef: double val, res, prev bint err=0, is_variable @@ -1034,8 +1038,8 @@ def roll_median_c(ndarray[float64_t] input, int64_t win, int64_t minp, # actual skiplist ops outweigh any window computation costs start, end, N, win, minp, is_variable = get_window_indexer( input, win, - minp, index, - use_mock=False, closed=closed) + minp, index, closed, + use_mock=False) output = np.empty(N, dtype=float) sl = skiplist_init(win) @@ -1144,7 +1148,7 @@ cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs, def roll_max(ndarray[numeric] input, int64_t win, int64_t minp, - object index, closed='right'): + object index, str closed): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1160,11 +1164,11 @@ def roll_max(ndarray[numeric] input, int64_t win, int64_t minp, make the interval closed on the right, left, both or neither endpoints """ - return _roll_min_max(input, win, minp, index, is_max=1, closed=closed) + return _roll_min_max(input, win, minp, index, closed=closed, is_max=1) def roll_min(ndarray[numeric] input, int64_t win, int64_t minp, - object index, closed='right'): + object index, str closed): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1181,7 +1185,7 @@ def roll_min(ndarray[numeric] input, int64_t win, int64_t minp, cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, - object index, bint is_max, closed='right'): + object index, str closed, bint is_max): """ Moving min/max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1206,7 +1210,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, starti, endi, N, win, minp, is_variable = get_window_indexer( input, win, - minp, index, closed=closed) + minp, index, closed) output = np.empty(N, dtype=input.dtype) @@ -1308,7 +1312,8 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, - int64_t minp, object index, double quantile, closed='right'): + int64_t minp, object index, str closed, + double quantile): """ O(N log(window)) implementation using skip list """ @@ -1328,8 +1333,8 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, # actual skiplist ops outweigh any window computation costs start, end, N, win, minp, is_variable = get_window_indexer( input, win, - minp, index, - use_mock=False, closed=closed) + minp, index, closed, + use_mock=False) output = np.empty(N, dtype=float) skiplist = IndexableSkiplist(win) @@ -1371,9 +1376,9 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, def roll_generic(ndarray[float64_t, cast=True] input, - int64_t win, int64_t minp, object index, + int64_t win, int64_t minp, object index, str closed, int offset, object func, - object args, object kwargs, closed='right'): + object args, object kwargs): cdef: ndarray[double_t] output, counts, bufarr float64_t *buf @@ -1391,8 +1396,8 @@ def roll_generic(ndarray[float64_t, cast=True] input, start, end, N, win, minp, is_variable = get_window_indexer(input, win, minp, index, - floor=0, - closed=closed) + closed, + floor=0) output = np.empty(N, dtype=float) counts = roll_sum(np.concatenate([np.isfinite(input).astype(float), diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 1de88afc67068..ec5121d27576a 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3410,7 +3410,7 @@ def test_closed(self): expected = df.copy() expected["A"] = [np.nan, 1.0, 1, 1, np.nan] - result = df.rolling('2s', closed='left').sum() + result = df.rolling('2s', closed='neither').sum() tm.assert_frame_equal(result, expected) def test_ragged_sum(self): From 5eaf3b4141308ff67630c55c9f68b354bc2870a5 Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Mon, 27 Mar 2017 13:52:30 +0100 Subject: [PATCH 04/13] str closed -> object closed Adding test of assert for closed parameter Adding assert for closed parameter in get_window_indexer --- pandas/core/window.pyx | 27 +++++++++++++-------------- pandas/tests/test_window.py | 4 ++++ 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/pandas/core/window.pyx b/pandas/core/window.pyx index 032a9101ae83b..0f2fd2659c53b 100644 --- a/pandas/core/window.pyx +++ b/pandas/core/window.pyx @@ -333,8 +333,7 @@ def get_window_indexer(input, win, minp, index, closed, bint l_closed = False bint r_closed = False - if closed not in ['right', 'left', 'both', 'neither']: - closed = 'right' + assert closed in ['right', 'left', 'both', 'neither'] if closed in ['right', 'both']: r_closed = True @@ -359,7 +358,7 @@ def get_window_indexer(input, win, minp, index, closed, def roll_count(ndarray[double_t] input, int64_t win, int64_t minp, - object index, closed='right'): + object index, object closed): cdef: double val, count_x = 0.0 int64_t s, e, nobs, N @@ -440,7 +439,7 @@ cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: def roll_sum(ndarray[double_t] input, int64_t win, int64_t minp, - object index, str closed): + object index, object closed): cdef: double val, prev_x, sum_x = 0 int64_t s, e @@ -556,7 +555,7 @@ cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, def roll_mean(ndarray[double_t] input, int64_t win, int64_t minp, - object index, str closed): + object index, object closed): cdef: double val, prev_x, result, sum_x = 0 int64_t s, e @@ -681,7 +680,7 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x, def roll_var(ndarray[double_t] input, int64_t win, int64_t minp, - object index, str closed, int ddof=1): + object index, object closed, int ddof=1): """ Numerically stable implementation using Welford's method. """ @@ -824,7 +823,7 @@ cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, def roll_skew(ndarray[double_t] input, int64_t win, int64_t minp, - object index, str closed): + object index, object closed): cdef: double val, prev double x = 0, xx = 0, xxx = 0 @@ -952,7 +951,7 @@ cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, def roll_kurt(ndarray[double_t] input, int64_t win, int64_t minp, - object index, str closed): + object index, object closed): cdef: double val, prev double x = 0, xx = 0, xxx = 0, xxxx = 0 @@ -1022,7 +1021,7 @@ def roll_kurt(ndarray[double_t] input, int64_t win, int64_t minp, def roll_median_c(ndarray[float64_t] input, int64_t win, int64_t minp, - object index, str closed): + object index, object closed): cdef: double val, res, prev bint err=0, is_variable @@ -1148,7 +1147,7 @@ cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs, def roll_max(ndarray[numeric] input, int64_t win, int64_t minp, - object index, str closed): + object index, object closed): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1168,7 +1167,7 @@ def roll_max(ndarray[numeric] input, int64_t win, int64_t minp, def roll_min(ndarray[numeric] input, int64_t win, int64_t minp, - object index, str closed): + object index, object closed): """ Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1185,7 +1184,7 @@ def roll_min(ndarray[numeric] input, int64_t win, int64_t minp, cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, - object index, str closed, bint is_max): + object index, object closed, bint is_max): """ Moving min/max of 1d array of any numeric type along axis=0 ignoring NaNs. @@ -1312,7 +1311,7 @@ cdef _roll_min_max(ndarray[numeric] input, int64_t win, int64_t minp, def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, - int64_t minp, object index, str closed, + int64_t minp, object index, object closed, double quantile): """ O(N log(window)) implementation using skip list @@ -1376,7 +1375,7 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, def roll_generic(ndarray[float64_t, cast=True] input, - int64_t win, int64_t minp, object index, str closed, + int64_t win, int64_t minp, object index, object closed, int offset, object func, object args, object kwargs): cdef: diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index ec5121d27576a..fb989d8dbf80a 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3393,6 +3393,10 @@ def test_closed(self): pd.Timestamp('20130101 09:00:04'), pd.Timestamp('20130101 09:00:06')]) + # closed must be 'right', 'left', 'both', 'neither' + with self.assertRaises(ValueError): + self.regular.rolling(window='2s', closed="blabla") + expected = df.copy() expected["A"] = [1.0, 2, 2, 2, 1] result = df.rolling('2s', closed='right').sum() From 0e8e65c4cb049016a5e676d341e7b94ff4058244 Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Mon, 27 Mar 2017 15:35:35 +0100 Subject: [PATCH 05/13] Adding doc-strings and PEP8 corrections Also, adding whatsnew entry --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/window.py | 1 - pandas/core/window.pyx | 29 +++++++++++++++++++++-------- pandas/tests/test_window.py | 12 ++++++------ 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fd1cd3d0022c9..c72445efee663 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -319,6 +319,7 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you Other Enhancements ^^^^^^^^^^^^^^^^^^ +- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window endpoint closedness. (:issue:`13965`) - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) - ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) diff --git a/pandas/core/window.py b/pandas/core/window.py index a77ff850f7915..f159a97d613a0 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -800,7 +800,6 @@ def apply(self, func, args=(), kwargs={}): window = self._get_window() offset = _offset(window, self.center) index, indexi = self._get_index() - closed = self.closed def f(arg, window, min_periods, closed): minp = _use_window(min_periods, window) diff --git a/pandas/core/window.pyx b/pandas/core/window.pyx index 0f2fd2659c53b..0d87d43885db3 100644 --- a/pandas/core/window.pyx +++ b/pandas/core/window.pyx @@ -158,6 +158,10 @@ cdef class MockFixedWindowIndexer(WindowIndexer): index of the input floor: optional unit for flooring + l_closed: bint + left endpoint closedness + r_closed: bint + right endpoint closedness """ def __init__(self, ndarray input, int64_t win, int64_t minp, @@ -192,6 +196,10 @@ cdef class FixedWindowIndexer(WindowIndexer): index of the input floor: optional unit for flooring the unit + l_closed: bint + left endpoint closedness + r_closed: bint + right endpoint closedness """ def __init__(self, ndarray input, int64_t win, int64_t minp, @@ -231,10 +239,14 @@ cdef class VariableWindowIndexer(WindowIndexer): min number of obs in a window to consider non-NaN index: ndarray index of the input + l_closed: bint + left endpoint closedness + r_closed: bint + right endpoint closedness """ def __init__(self, ndarray input, int64_t win, int64_t minp, - ndarray index, bint l_closed=False, bint r_closed=True): + ndarray index, bint l_closed, bint r_closed): self.is_variable = 1 self.N = len(index) @@ -251,9 +263,8 @@ cdef class VariableWindowIndexer(WindowIndexer): # max window size self.win = (self.end - self.start).max() - def build(self, ndarray[int64_t] index, int64_t win, bint l_closed, - bint r_closed): + bint r_closed): cdef: ndarray[int64_t] start, end @@ -266,9 +277,9 @@ cdef class VariableWindowIndexer(WindowIndexer): start[0] = 0 - if r_closed: # right endpoint is closed + if r_closed: # right endpoint is closed end[0] = 1 - else: # right endpoint is open + else: # right endpoint is open end[0] = 0 with nogil: @@ -279,7 +290,7 @@ cdef class VariableWindowIndexer(WindowIndexer): end_bound = index[i] start_bound = index[i] - win - if l_closed: # left endpoint is closed + if l_closed: # left endpoint is closed start_bound -= 1 # advance the start bound until we are @@ -314,6 +325,8 @@ def get_window_indexer(input, win, minp, index, closed, minp: integer, minimum periods index: 1d ndarray, optional index to the input array + closed: 'right', 'left', 'both', 'neither' + window endpoint closedness floor: optional unit for flooring the unit use_mock: boolean, default True @@ -1024,8 +1037,8 @@ def roll_median_c(ndarray[float64_t] input, int64_t win, int64_t minp, object index, object closed): cdef: double val, res, prev - bint err=0, is_variable - int ret=0 + bint err = 0, is_variable + int ret = 0 skiplist_t *sl Py_ssize_t i, j int64_t nobs = 0, N, s, e diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index fb989d8dbf80a..468e504f214fb 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3386,12 +3386,12 @@ def test_min_periods(self): tm.assert_frame_equal(result, expected) def test_closed(self): - df = DataFrame({'A': [1]*5}, - index = [pd.Timestamp('20130101 09:00:01'), - pd.Timestamp('20130101 09:00:02'), - pd.Timestamp('20130101 09:00:03'), - pd.Timestamp('20130101 09:00:04'), - pd.Timestamp('20130101 09:00:06')]) + df = DataFrame({'A': [1] * 5}, + index=[pd.Timestamp('20130101 09:00:01'), + pd.Timestamp('20130101 09:00:02'), + pd.Timestamp('20130101 09:00:03'), + pd.Timestamp('20130101 09:00:04'), + pd.Timestamp('20130101 09:00:06')]) # closed must be 'right', 'left', 'both', 'neither' with self.assertRaises(ValueError): From ec4bbc7a83666257dcfca6a63c4fc25fd6c75965 Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Wed, 5 Apr 2017 10:22:22 +0100 Subject: [PATCH 06/13] Changing l_closed and r_closed variable names --- pandas/core/window.pyx | 58 ++++++++++++++++++++----------------- pandas/tests/test_window.py | 3 ++ 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/pandas/core/window.pyx b/pandas/core/window.pyx index 0d87d43885db3..02a66e0f73605 100644 --- a/pandas/core/window.pyx +++ b/pandas/core/window.pyx @@ -158,15 +158,15 @@ cdef class MockFixedWindowIndexer(WindowIndexer): index of the input floor: optional unit for flooring - l_closed: bint + left_closed: bint left endpoint closedness - r_closed: bint + right_closed: bint right endpoint closedness """ def __init__(self, ndarray input, int64_t win, int64_t minp, object index=None, object floor=None, - bint l_closed=False, bint r_closed=True): + bint left_closed=False, bint right_closed=True): assert index is None self.is_variable = 0 @@ -196,15 +196,15 @@ cdef class FixedWindowIndexer(WindowIndexer): index of the input floor: optional unit for flooring the unit - l_closed: bint + left_closed: bint left endpoint closedness - r_closed: bint + right_closed: bint right endpoint closedness """ def __init__(self, ndarray input, int64_t win, int64_t minp, object index=None, object floor=None, - bint l_closed=False, bint r_closed=True): + bint left_closed=False, bint right_closed=True): cdef ndarray start_s, start_e, end_s, end_e assert index is None @@ -239,14 +239,16 @@ cdef class VariableWindowIndexer(WindowIndexer): min number of obs in a window to consider non-NaN index: ndarray index of the input - l_closed: bint + left_closed: bint left endpoint closedness - r_closed: bint + True if the left endpoint is closed, False if open + right_closed: bint right endpoint closedness + True if the right endpoint is closed, False if open """ def __init__(self, ndarray input, int64_t win, int64_t minp, - ndarray index, bint l_closed, bint r_closed): + ndarray index, bint left_closed, bint right_closed): self.is_variable = 1 self.N = len(index) @@ -258,13 +260,13 @@ cdef class VariableWindowIndexer(WindowIndexer): self.end = np.empty(self.N, dtype='int64') self.end.fill(-1) - self.build(index, win, l_closed, r_closed) + self.build(index, win, left_closed, right_closed) # max window size self.win = (self.end - self.start).max() - def build(self, ndarray[int64_t] index, int64_t win, bint l_closed, - bint r_closed): + def build(self, ndarray[int64_t] index, int64_t win, bint left_closed, + bint right_closed): cdef: ndarray[int64_t] start, end @@ -277,9 +279,11 @@ cdef class VariableWindowIndexer(WindowIndexer): start[0] = 0 - if r_closed: # right endpoint is closed + # right endpoint is closed + if right_closed: end[0] = 1 - else: # right endpoint is open + # right endpoint is open + else: end[0] = 0 with nogil: @@ -290,7 +294,8 @@ cdef class VariableWindowIndexer(WindowIndexer): end_bound = index[i] start_bound = index[i] - win - if l_closed: # left endpoint is closed + # left endpoint is closed + if left_closed: start_bound -= 1 # advance the start bound until we are @@ -309,7 +314,7 @@ cdef class VariableWindowIndexer(WindowIndexer): end[i] = end[i - 1] # right endpoint is open - if not r_closed: + if not right_closed: end[i] -= 1 @@ -325,7 +330,8 @@ def get_window_indexer(input, win, minp, index, closed, minp: integer, minimum periods index: 1d ndarray, optional index to the input array - closed: 'right', 'left', 'both', 'neither' + closed: string, default 'right' + {'right', 'left', 'both', 'neither'} window endpoint closedness floor: optional unit for flooring the unit @@ -343,26 +349,26 @@ def get_window_indexer(input, win, minp, index, closed, """ cdef: - bint l_closed = False - bint r_closed = False + bint left_closed = False + bint right_closed = False assert closed in ['right', 'left', 'both', 'neither'] if closed in ['right', 'both']: - r_closed = True + right_closed = True if closed in ['left', 'both']: - l_closed = True + left_closed = True if index is not None: - indexer = VariableWindowIndexer(input, win, minp, index, l_closed, - r_closed) + indexer = VariableWindowIndexer(input, win, minp, index, left_closed, + right_closed) elif use_mock: indexer = MockFixedWindowIndexer(input, win, minp, index, floor, - l_closed, r_closed) + left_closed, right_closed) else: - indexer = FixedWindowIndexer(input, win, minp, index, floor, l_closed, - r_closed) + indexer = FixedWindowIndexer(input, win, minp, index, floor, left_closed, + right_closed) return indexer.get_data() # ---------------------------------------------------------------------- diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 468e504f214fb..1dfa087380a9d 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3386,6 +3386,9 @@ def test_min_periods(self): tm.assert_frame_equal(result, expected) def test_closed(self): + + # xref GH13965 + df = DataFrame({'A': [1] * 5}, index=[pd.Timestamp('20130101 09:00:01'), pd.Timestamp('20130101 09:00:02'), From 306b9f7227f13ac86d91bad5ef82c1b4744e8c3d Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Wed, 5 Apr 2017 15:09:42 +0100 Subject: [PATCH 07/13] Commiting progress on default=None. Still not tested Adding computation.rst section (still not written) --- doc/source/computation.rst | 21 +++++++++++++++++++++ doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/window.py | 12 +++++++----- pandas/core/window.pyx | 14 +++++++++----- pandas/tests/test_window.py | 4 ++++ 5 files changed, 42 insertions(+), 11 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index a37cbc96b2d8c..5124d249df117 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -459,6 +459,27 @@ default of the index) in a DataFrame. dft dft.rolling('2s', on='foo').sum() +.. _stats.rolling_window.endpoints: + +Rolling window endpoint inclusion +~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.20.0 + +New in version 0.19.0 are the ability to pass an offset (or convertible) to a ``.rolling()`` method and have it produce +variable sized windows based on the passed time window. For each time point, this includes all preceding values occurring +within the indicated time delta. + +This can be particularly useful for a non-regular time frequency index. + +.. ipython:: python + + dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, + index=pd.date_range('20130101 09:00:00', periods=5, freq='s')) + dft + +This is a regular frequency index. Using an integer window parameter works to roll along the window frequency. + .. _stats.moments.ts-versus-resampling: Time-aware Rolling vs. Resampling diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c72445efee663..c81bb4fd3917b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -319,7 +319,7 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you Other Enhancements ^^^^^^^^^^^^^^^^^^ -- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window endpoint closedness. (:issue:`13965`) +- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window endpoint closedness. See :ref:`documentation ` (:issue:`13965`) - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) - ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) diff --git a/pandas/core/window.py b/pandas/core/window.py index f159a97d613a0..9b100b14bf995 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -60,7 +60,7 @@ class _Window(PandasObject, SelectionMixin): exclusions = set() def __init__(self, obj, window=None, min_periods=None, freq=None, - center=False, win_type=None, axis=0, on=None, closed='right', + center=False, win_type=None, axis=0, on=None, closed=None, **kwargs): if freq is not None: @@ -379,10 +379,12 @@ class Window(_Window): on : string, optional For a DataFrame, column on which to calculate the rolling window, rather than the index - closed : 'right', 'left', 'both', 'neither' - For offset-based windows, make the interval closed on the right, left, - or on both endpoints. Can also make the interval open on both endpoints - (neither). + closed : string, default None + Make the interval closed on the 'right', 'left', 'both' or + 'neither' endpoints. + For offset-based windows, it defaults to 'right'. + For fixed windows, defaults to 'both'. Remaining cases not implemented + for fixed windows. .. versionadded:: 0.20.0 diff --git a/pandas/core/window.pyx b/pandas/core/window.pyx index 02a66e0f73605..4415fde53d4de 100644 --- a/pandas/core/window.pyx +++ b/pandas/core/window.pyx @@ -165,8 +165,8 @@ cdef class MockFixedWindowIndexer(WindowIndexer): """ def __init__(self, ndarray input, int64_t win, int64_t minp, - object index=None, object floor=None, - bint left_closed=False, bint right_closed=True): + bint left_closed, bint right_closed, + object index=None, object floor=None): assert index is None self.is_variable = 0 @@ -203,8 +203,8 @@ cdef class FixedWindowIndexer(WindowIndexer): """ def __init__(self, ndarray input, int64_t win, int64_t minp, - object index=None, object floor=None, - bint left_closed=False, bint right_closed=True): + bint left_closed, bint right_closed, + object index=None, object floor=None): cdef ndarray start_s, start_e, end_s, end_e assert index is None @@ -352,7 +352,11 @@ def get_window_indexer(input, win, minp, index, closed, bint left_closed = False bint right_closed = False - assert closed in ['right', 'left', 'both', 'neither'] + assert closed is None or closed in ['right', 'left', 'both', 'neither'] + + # if windows is variable, default is 'right', otherwise default is 'both' + if closed is None: + closed = 'right' if index is not None else 'both' if closed in ['right', 'both']: right_closed = True diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 1dfa087380a9d..1a9e3eaefd62c 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3405,6 +3405,10 @@ def test_closed(self): result = df.rolling('2s', closed='right').sum() tm.assert_frame_equal(result, expected) + # default should be 'right' + result = df.rolling('2s').sum() + tm.assert_frame_equal(result, expected) + expected = df.copy() expected["A"] = [1.0, 2, 3, 3, 2] result = df.rolling('2s', closed='both').sum() From 8bd336a407bb93276ec91d75167e5d5b64812b72 Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Wed, 5 Apr 2017 22:55:18 +0100 Subject: [PATCH 08/13] Almost there Not allowing closed parameter for fixed windows Passing tests again Updating docs --- doc/source/computation.rst | 32 ++++++++++++++++++++++++-------- pandas/core/generic.py | 2 +- pandas/core/window.py | 6 +++++- pandas/core/window.pyx | 19 ++++++++++--------- pandas/tests/test_window.py | 4 ++++ 5 files changed, 44 insertions(+), 19 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 5124d249df117..4dbbd1511a2a0 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -466,19 +466,35 @@ Rolling window endpoint inclusion .. versionadded:: 0.20.0 -New in version 0.19.0 are the ability to pass an offset (or convertible) to a ``.rolling()`` method and have it produce -variable sized windows based on the passed time window. For each time point, this includes all preceding values occurring -within the indicated time delta. +The inclusion of the interval endpoints in rolling window calculations can be specified with the ``closed`` +parameter: -This can be particularly useful for a non-regular time frequency index. + - ``right`` : close right endpoint (default for time-based windows) + - ``left`` : close left endpoint + - ``both`` : close both endpoints (default for fixed windows) + - ``neither`` : open endpoints + +For example, having the right endpoint open is useful in many problems that require that there is no contamination +from present information back to past information. This allows the rolling window to compute statistics +"up to that point in time", but not including that point in time. .. ipython:: python - dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, - index=pd.date_range('20130101 09:00:00', periods=5, freq='s')) - dft + df = pd.DataFrame({'x': [1]*5}, + index = [pd.Timestamp('20130101 09:00:01'), + pd.Timestamp('20130101 09:00:02'), + pd.Timestamp('20130101 09:00:03'), + pd.Timestamp('20130101 09:00:04'), + pd.Timestamp('20130101 09:00:06')]) -This is a regular frequency index. Using an integer window parameter works to roll along the window frequency. + df["right"] = df.rolling('2s', closed='right').x.sum() # default + df["both"] = df.rolling('2s', closed='both').x.sum() + df["left"] = df.rolling('2s', closed='left').x.sum() + df["open"] = df.rolling('2s', closed='neither').x.sum() + + df + +Currently, this feature is only implemented for time-based windows. .. _stats.moments.ts-versus-resampling: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d89acfcc282c5..86978a9739ca4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5962,7 +5962,7 @@ def _add_series_or_dataframe_operations(cls): @Appender(rwindow.rolling.__doc__) def rolling(self, window, min_periods=None, freq=None, center=False, - win_type=None, on=None, axis=0, closed='right'): + win_type=None, on=None, axis=0, closed=None): axis = self._get_axis_number(axis) return rwindow.rolling(self, window=window, min_periods=min_periods, freq=freq, diff --git a/pandas/core/window.py b/pandas/core/window.py index 9b100b14bf995..3f068f9159809 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -103,7 +103,8 @@ def validate(self): if self.min_periods is not None and not \ is_integer(self.min_periods): raise ValueError("min_periods must be an integer") - if self.closed not in ['right', 'both', 'left', 'neither']: + if self.closed is not None and self.closed not in \ + ['right', 'both', 'left', 'neither']: raise ValueError("closed must be 'right', 'left', 'both' or " "'neither'") @@ -1056,6 +1057,9 @@ def validate(self): raise ValueError("window must be an integer") elif self.window < 0: raise ValueError("window must be non-negative") + elif self.closed is not None: + raise ValueError("closed only implemented for datetimelike " + "and offset based windows") def _validate_monotonic(self): """ validate on is monotonic """ diff --git a/pandas/core/window.pyx b/pandas/core/window.pyx index 4415fde53d4de..3bb8abe26c781 100644 --- a/pandas/core/window.pyx +++ b/pandas/core/window.pyx @@ -248,7 +248,7 @@ cdef class VariableWindowIndexer(WindowIndexer): """ def __init__(self, ndarray input, int64_t win, int64_t minp, - ndarray index, bint left_closed, bint right_closed): + bint left_closed, bint right_closed, ndarray index): self.is_variable = 1 self.N = len(index) @@ -330,9 +330,10 @@ def get_window_indexer(input, win, minp, index, closed, minp: integer, minimum periods index: 1d ndarray, optional index to the input array - closed: string, default 'right' + closed: string, default None {'right', 'left', 'both', 'neither'} - window endpoint closedness + window endpoint closedness. Defaults to 'right' in + VariableWindowIndexer and to 'both' in FixedWindowIndexer floor: optional unit for flooring the unit use_mock: boolean, default True @@ -365,14 +366,14 @@ def get_window_indexer(input, win, minp, index, closed, left_closed = True if index is not None: - indexer = VariableWindowIndexer(input, win, minp, index, left_closed, - right_closed) + indexer = VariableWindowIndexer(input, win, minp, left_closed, + right_closed, index) elif use_mock: - indexer = MockFixedWindowIndexer(input, win, minp, index, floor, - left_closed, right_closed) + indexer = MockFixedWindowIndexer(input, win, minp, left_closed, + right_closed, index, floor) else: - indexer = FixedWindowIndexer(input, win, minp, index, floor, left_closed, - right_closed) + indexer = FixedWindowIndexer(input, win, minp, left_closed, + right_closed, index, floor) return indexer.get_data() # ---------------------------------------------------------------------- diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 1a9e3eaefd62c..9175117d1ec8e 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3396,6 +3396,10 @@ def test_closed(self): pd.Timestamp('20130101 09:00:04'), pd.Timestamp('20130101 09:00:06')]) + # closed only allowed for datetimelike + with self.assertRaises(ValueError): + self.regular.rolling(window=3, closed='both') + # closed must be 'right', 'left', 'both', 'neither' with self.assertRaises(ValueError): self.regular.rolling(window='2s', closed="blabla") From 90dfb0c00441196eec9ab59355ec3410c9d56230 Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Sun, 9 Apr 2017 23:06:43 +0100 Subject: [PATCH 09/13] Correcting bug in window validation --- pandas/core/window.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 3f068f9159809..737229082d4cb 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1057,7 +1057,8 @@ def validate(self): raise ValueError("window must be an integer") elif self.window < 0: raise ValueError("window must be non-negative") - elif self.closed is not None: + + if not self.is_datetimelike and self.closed is not None: raise ValueError("closed only implemented for datetimelike " "and offset based windows") From c18a31bacfab21ce14cfc32d098f0a442b4f4f88 Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Sun, 9 Apr 2017 23:38:51 +0100 Subject: [PATCH 10/13] Fixing test of assertion of closed parameter in fixed windows Style corrections --- doc/source/computation.rst | 2 +- pandas/core/window.py | 2 +- pandas/tests/test_window.py | 10 ++++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 4dbbd1511a2a0..79956c61325c5 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -490,7 +490,7 @@ from present information back to past information. This allows the rolling windo df["right"] = df.rolling('2s', closed='right').x.sum() # default df["both"] = df.rolling('2s', closed='both').x.sum() df["left"] = df.rolling('2s', closed='left').x.sum() - df["open"] = df.rolling('2s', closed='neither').x.sum() + df["neither"] = df.rolling('2s', closed='neither').x.sum() df diff --git a/pandas/core/window.py b/pandas/core/window.py index 737229082d4cb..5b84b075ce81a 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1060,7 +1060,7 @@ def validate(self): if not self.is_datetimelike and self.closed is not None: raise ValueError("closed only implemented for datetimelike " - "and offset based windows") + "and offset based windows") def _validate_monotonic(self): """ validate on is monotonic """ diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 9175117d1ec8e..85f29fd3f9c2a 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -431,6 +431,12 @@ def test_numpy_compat(self): tm.assertRaisesRegexp(UnsupportedFunctionCall, msg, getattr(r, func), dtype=np.float64) + def test_closed(self): + df = DataFrame({'A': [0, 1, 2, 3, 4]}) + # closed only allowed for datetimelike + with self.assertRaises(ValueError): + df.rolling(window=3, closed='both') + class TestExpanding(Base): @@ -3396,10 +3402,6 @@ def test_closed(self): pd.Timestamp('20130101 09:00:04'), pd.Timestamp('20130101 09:00:06')]) - # closed only allowed for datetimelike - with self.assertRaises(ValueError): - self.regular.rolling(window=3, closed='both') - # closed must be 'right', 'left', 'both', 'neither' with self.assertRaises(ValueError): self.regular.rolling(window='2s', closed="blabla") From 037b84e4290e42c94a449331cc01237d7142361f Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Sun, 9 Apr 2017 23:49:19 +0100 Subject: [PATCH 11/13] Fixing style --- doc/source/computation.rst | 2 +- pandas/tests/test_window.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 79956c61325c5..cc3f06f335314 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -487,7 +487,7 @@ from present information back to past information. This allows the rolling windo pd.Timestamp('20130101 09:00:04'), pd.Timestamp('20130101 09:00:06')]) - df["right"] = df.rolling('2s', closed='right').x.sum() # default + df["right"] = df.rolling('2s', closed='right').x.sum() # default df["both"] = df.rolling('2s', closed='both').x.sum() df["left"] = df.rolling('2s', closed='left').x.sum() df["neither"] = df.rolling('2s', closed='neither').x.sum() diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 85f29fd3f9c2a..e0fc6fe707554 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -435,7 +435,7 @@ def test_closed(self): df = DataFrame({'A': [0, 1, 2, 3, 4]}) # closed only allowed for datetimelike with self.assertRaises(ValueError): - df.rolling(window=3, closed='both') + df.rolling(window=3, closed='both') class TestExpanding(Base): From 568c12f2afeb21900655abef7e647e4d88450958 Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Mon, 10 Apr 2017 13:57:40 +0100 Subject: [PATCH 12/13] Innocuous change to rerun tests --- pandas/tests/test_window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index e0fc6fe707554..3929aba858797 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -435,7 +435,7 @@ def test_closed(self): df = DataFrame({'A': [0, 1, 2, 3, 4]}) # closed only allowed for datetimelike with self.assertRaises(ValueError): - df.rolling(window=3, closed='both') + df.rolling(window=3, closed='neither') class TestExpanding(Base): From aad97dc48304aeca64d82607c2e8fbc724e060f9 Mon Sep 17 00:00:00 2001 From: carlosdanielcsantos Date: Thu, 13 Apr 2017 01:18:13 +0100 Subject: [PATCH 13/13] Updating docs --- doc/source/computation.rst | 17 +++++++++++------ doc/source/whatsnew/v0.20.0.txt | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/doc/source/computation.rst b/doc/source/computation.rst index cc3f06f335314..cd90ba6e9ca1a 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -461,18 +461,22 @@ default of the index) in a DataFrame. .. _stats.rolling_window.endpoints: -Rolling window endpoint inclusion -~~~~~~~~~~~~~~~~~~ +Rolling Window Endpoints +~~~~~~~~~~~~~~~~~~~~~~~~ .. versionadded:: 0.20.0 The inclusion of the interval endpoints in rolling window calculations can be specified with the ``closed`` parameter: - - ``right`` : close right endpoint (default for time-based windows) - - ``left`` : close left endpoint - - ``both`` : close both endpoints (default for fixed windows) - - ``neither`` : open endpoints +.. csv-table:: + :header: "``closed``", "Description", "Default for" + :widths: 20, 30, 30 + + ``right``, close right endpoint, time-based windows + ``left``, close left endpoint, + ``both``, close both endpoints, fixed windows + ``neither``, open endpoints, For example, having the right endpoint open is useful in many problems that require that there is no contamination from present information back to past information. This allows the rolling window to compute statistics @@ -495,6 +499,7 @@ from present information back to past information. This allows the rolling windo df Currently, this feature is only implemented for time-based windows. +For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed. .. _stats.moments.ts-versus-resampling: diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c81bb4fd3917b..db650dc6569eb 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -319,7 +319,7 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you Other Enhancements ^^^^^^^^^^^^^^^^^^ -- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window endpoint closedness. See :ref:`documentation ` (:issue:`13965`) +- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window endpoint closedness. See the :ref:`documentation ` (:issue:`13965`) - Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. - ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) - ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`)