From d65658d0a6c481a3bf4002e34288c99a8e7c971c Mon Sep 17 00:00:00 2001 From: agraboso Date: Thu, 11 Aug 2016 12:36:50 -0400 Subject: [PATCH 01/12] ENH: allow exact matches in time-based .rolling() Closes #13965 --- pandas/core/generic.py | 7 +++-- pandas/core/window.py | 29 ++++++++++++++---- pandas/tests/test_window.py | 59 +++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3678168890444..98c8fea5a9728 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5567,11 +5567,12 @@ def _add_series_or_dataframe_operations(cls): from pandas.core import window as rwindow @Appender(rwindow.rolling.__doc__) - def rolling(self, window, min_periods=None, freq=None, center=False, - win_type=None, on=None, axis=0): + def rolling(self, window, min_periods=None, left_closed=None, + freq=None, center=False, win_type=None, on=None, axis=0): axis = self._get_axis_number(axis) return rwindow.rolling(self, window=window, - min_periods=min_periods, freq=freq, + min_periods=min_periods, + left_closed=left_closed, freq=freq, center=center, win_type=win_type, on=on, axis=axis) diff --git a/pandas/core/window.py b/pandas/core/window.py index b7276aed506de..41a338c7b3d7b 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -997,6 +997,17 @@ def _get_corr(a, b): class Rolling(_Rolling_and_Expanding): + _attributes = ['window', 'min_periods', 'left_closed', 'freq', + 'center', 'win_type', 'axis', 'on'] + + def __init__(self, obj, window=None, min_periods=None, left_closed=None, + freq=None, center=False, win_type=None, axis=0, on=None, + **kwargs): + self.left_closed = left_closed + super(Rolling, self).__init__(obj=obj, window=window, + min_periods=min_periods, freq=freq, + center=center, win_type=win_type, + axis=axis, on=on, **kwargs) @cache_readonly def is_datetimelike(self): @@ -1045,18 +1056,26 @@ def validate(self): "for datetimelike and offset " "based windows") + if self.left_closed is not None and not is_bool(self.left_closed): + raise ValueError("left_closed must be a boolean") + # this will raise ValueError on non-fixed freqs self.window = freq.nanos + if self.left_closed: + self.window += 1 self.win_type = 'freq' # min_periods must be an integer if self.min_periods is None: self.min_periods = 1 - - elif not is_integer(self.window): - raise ValueError("window must be an integer") - elif self.window < 0: - raise ValueError("window must be non-negative") + else: + if self.left_closed is not None: + raise ValueError("left_closed only valid for datetimelike " + "and offset based windows") + elif not is_integer(self.window): + raise ValueError("window must be an integer") + elif self.window < 0: + raise ValueError("window must be non-negative") @Substitution(name='rolling') @Appender(SelectionMixin._see_also_template) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 929ff43bfaaad..1935f3299b8d8 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3288,6 +3288,65 @@ def test_min_periods(self): result = df.rolling('2s', min_periods=1).sum() tm.assert_frame_equal(result, expected) + def test_left_closed(self): + + df = DataFrame({'B': [1, 1, 2, np.nan, 4]}, + index=[Timestamp('20130101 09:00:00'), + Timestamp('20130101 09:00:02'), + Timestamp('20130101 09:00:03'), + Timestamp('20130101 09:00:05'), + Timestamp('20130101 09:00:06')]) + expected = df.rolling('4s').count() + result = df.rolling('3s', left_closed=True).count() + tm.assert_frame_equal(result, expected) + expected = df.rolling('4s').sum() + result = df.rolling('3s', left_closed=True).sum() + tm.assert_frame_equal(result, expected) + + df = DataFrame({'B': [1, 1, 2, np.nan, 4], + 'timestamp': [Timestamp('20130101 09:00:00'), + Timestamp('20130101 09:00:02'), + Timestamp('20130101 09:00:03'), + Timestamp('20130101 09:00:05'), + Timestamp('20130101 09:00:06')]}) + expected = df.rolling('4s', on='timestamp').count() + result = df.rolling('3s', left_closed=True, + on='timestamp').count() + tm.assert_frame_equal(result, expected) + expected = df.rolling('4s', on='timestamp').sum() + result = df.rolling('3s', left_closed=True, + on='timestamp').sum() + tm.assert_frame_equal(result, expected) + + df = DataFrame({'B': [1, 1, 2, np.nan, 4]}, + index=[Timestamp('20130101'), + Timestamp('20130103'), + Timestamp('20130104'), + Timestamp('20130106'), + Timestamp('20130107')]) + expected = df.rolling('4d').count() + result = df.rolling('3d', left_closed=True).count() + tm.assert_frame_equal(result, expected) + expected = df.rolling('4d').sum() + result = df.rolling('3d', left_closed=True).sum() + tm.assert_frame_equal(result, expected) + + df = DataFrame({'B': [1, 1, 2, np.nan, 4], + 'timestamp': [Timestamp('20130101'), + Timestamp('20130103'), + Timestamp('20130104'), + Timestamp('20130106'), + Timestamp('20130107')]}) + expected = df.rolling('4d', on='timestamp').count() + result = df.rolling('3d', left_closed=True, + on='timestamp').count() + tm.assert_frame_equal(result, expected) + expected = df.rolling('4d', on='timestamp').sum() + result = df.rolling('3d', left_closed=True, + on='timestamp').sum() + tm.assert_frame_equal(result, expected) + + def test_ragged_sum(self): df = self.ragged From 758986de08c2cf799f6093f43a9a4faebc3e1c47 Mon Sep 17 00:00:00 2001 From: agraboso Date: Thu, 11 Aug 2016 16:16:24 -0400 Subject: [PATCH 02/12] Add test --- pandas/tests/test_window.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 1935f3299b8d8..6d10017cf92ec 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3346,6 +3346,16 @@ def test_left_closed(self): on='timestamp').sum() tm.assert_frame_equal(result, expected) + df = DataFrame({'B': [1] * 3}, + index=[Timestamp('20130101 09:00:30'), + Timestamp('20130101 09:01:00'), + Timestamp('20130101 09:02:00')]) + expected = DataFrame({'B': [1., 2., 2.]}, + index=[Timestamp('20130101 09:00:30'), + Timestamp('20130101 09:01:00'), + Timestamp('20130101 09:02:00')]) + result = df.rolling('1min', left_closed=True).sum() + tm.assert_frame_equal(result, expected) def test_ragged_sum(self): From 396e058c2db161cf92bc412ef4990084876e1a58 Mon Sep 17 00:00:00 2001 From: agraboso Date: Thu, 11 Aug 2016 16:22:19 -0400 Subject: [PATCH 03/12] Fix indentation --- pandas/tests/test_window.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 6d10017cf92ec..bb8b3e836e360 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3351,9 +3351,9 @@ def test_left_closed(self): Timestamp('20130101 09:01:00'), Timestamp('20130101 09:02:00')]) expected = DataFrame({'B': [1., 2., 2.]}, - index=[Timestamp('20130101 09:00:30'), - Timestamp('20130101 09:01:00'), - Timestamp('20130101 09:02:00')]) + index=[Timestamp('20130101 09:00:30'), + Timestamp('20130101 09:01:00'), + Timestamp('20130101 09:02:00')]) result = df.rolling('1min', left_closed=True).sum() tm.assert_frame_equal(result, expected) From 3df0da10f7a410c1f1f54a6a33bb578a38ceb6c1 Mon Sep 17 00:00:00 2001 From: agraboso Date: Thu, 11 Aug 2016 18:14:01 -0400 Subject: [PATCH 04/12] Add test for invalid left_closed --- pandas/tests/test_window.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index bb8b3e836e360..c799c9edf4799 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3357,6 +3357,9 @@ def test_left_closed(self): result = df.rolling('1min', left_closed=True).sum() tm.assert_frame_equal(result, expected) + with self.assertRaises(ValueError): + df.rolling('1min', left_closed="'tis wrong") + def test_ragged_sum(self): df = self.ragged From 84cebbf63e2c946300deccfebfde5c4f40454377 Mon Sep 17 00:00:00 2001 From: agraboso Date: Thu, 11 Aug 2016 18:18:14 -0400 Subject: [PATCH 05/12] Fix indentation --- pandas/tests/test_window.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index c799c9edf4799..af502ff690ed8 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3347,9 +3347,9 @@ def test_left_closed(self): tm.assert_frame_equal(result, expected) df = DataFrame({'B': [1] * 3}, - index=[Timestamp('20130101 09:00:30'), - Timestamp('20130101 09:01:00'), - Timestamp('20130101 09:02:00')]) + index=[Timestamp('20130101 09:00:30'), + Timestamp('20130101 09:01:00'), + Timestamp('20130101 09:02:00')]) expected = DataFrame({'B': [1., 2., 2.]}, index=[Timestamp('20130101 09:00:30'), Timestamp('20130101 09:01:00'), From dc2d6fddb330e18f7806b3c5a5c6880d31ced455 Mon Sep 17 00:00:00 2001 From: agraboso Date: Thu, 11 Aug 2016 18:24:32 -0400 Subject: [PATCH 06/12] Reorder arguments --- pandas/core/generic.py | 9 ++++----- pandas/core/window.py | 10 +++++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 98c8fea5a9728..20358ad9de9d6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5567,14 +5567,13 @@ def _add_series_or_dataframe_operations(cls): from pandas.core import window as rwindow @Appender(rwindow.rolling.__doc__) - def rolling(self, window, min_periods=None, left_closed=None, - freq=None, center=False, win_type=None, on=None, axis=0): + def rolling(self, window, min_periods=None, freq=None, center=False, + win_type=None, on=None, axis=0, left_closed=None): axis = self._get_axis_number(axis) return rwindow.rolling(self, window=window, - min_periods=min_periods, - left_closed=left_closed, freq=freq, + min_periods=min_periods, freq=freq, center=center, win_type=win_type, - on=on, axis=axis) + on=on, axis=axis, left_closed=left_closed) cls.rolling = rolling diff --git a/pandas/core/window.py b/pandas/core/window.py index 41a338c7b3d7b..840bda2154c78 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -997,12 +997,12 @@ def _get_corr(a, b): class Rolling(_Rolling_and_Expanding): - _attributes = ['window', 'min_periods', 'left_closed', 'freq', - 'center', 'win_type', 'axis', 'on'] + _attributes = ['window', 'min_periods', 'freq', 'center', 'win_type', + 'axis', 'on', 'left_closed'] - def __init__(self, obj, window=None, min_periods=None, left_closed=None, - freq=None, center=False, win_type=None, axis=0, on=None, - **kwargs): + def __init__(self, obj, window=None, min_periods=None, freq=None, + center=False, win_type=None, axis=0, on=None, + left_closed=None, **kwargs): self.left_closed = left_closed super(Rolling, self).__init__(obj=obj, window=window, min_periods=min_periods, freq=freq, From 8291645169b6e7b90dca1cb9d10b924812b39faf Mon Sep 17 00:00:00 2001 From: agraboso Date: Fri, 12 Aug 2016 09:24:16 -0400 Subject: [PATCH 07/12] Change left_closed=False|True to closed='right'|'both' --- pandas/core/generic.py | 4 ++-- pandas/core/window.py | 18 +++++++++--------- pandas/tests/test_window.py | 22 +++++++++++----------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 20358ad9de9d6..a1be2cc470a8d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5568,12 +5568,12 @@ def _add_series_or_dataframe_operations(cls): @Appender(rwindow.rolling.__doc__) def rolling(self, window, min_periods=None, freq=None, center=False, - win_type=None, on=None, axis=0, left_closed=None): + win_type=None, on=None, axis=0, closed='right'): axis = self._get_axis_number(axis) return rwindow.rolling(self, window=window, min_periods=min_periods, freq=freq, center=center, win_type=win_type, - on=on, axis=axis, left_closed=left_closed) + on=on, axis=axis, closed=closed) cls.rolling = rolling diff --git a/pandas/core/window.py b/pandas/core/window.py index 840bda2154c78..8dafaaf10be39 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -998,12 +998,12 @@ def _get_corr(a, b): class Rolling(_Rolling_and_Expanding): _attributes = ['window', 'min_periods', 'freq', 'center', 'win_type', - 'axis', 'on', 'left_closed'] + 'axis', 'on', 'closed'] def __init__(self, obj, window=None, min_periods=None, freq=None, center=False, win_type=None, axis=0, on=None, - left_closed=None, **kwargs): - self.left_closed = left_closed + closed='right', **kwargs): + self.closed = closed super(Rolling, self).__init__(obj=obj, window=window, min_periods=min_periods, freq=freq, center=center, win_type=win_type, @@ -1032,6 +1032,9 @@ def _on(self): def validate(self): super(Rolling, self).validate() + if self.closed not in ['right', 'both']: + raise ValueError("closed must be right or both") + # we allow rolling on a datetimelike index if (self.is_datetimelike and isinstance(self.window, (compat.string_types, DateOffset))): @@ -1056,12 +1059,9 @@ def validate(self): "for datetimelike and offset " "based windows") - if self.left_closed is not None and not is_bool(self.left_closed): - raise ValueError("left_closed must be a boolean") - # this will raise ValueError on non-fixed freqs self.window = freq.nanos - if self.left_closed: + if self.closed == 'both': self.window += 1 self.win_type = 'freq' @@ -1069,8 +1069,8 @@ def validate(self): if self.min_periods is None: self.min_periods = 1 else: - if self.left_closed is not None: - raise ValueError("left_closed only valid for datetimelike " + if self.closed == 'both': + raise ValueError("closed=both only valid for datetimelike " "and offset based windows") elif not is_integer(self.window): raise ValueError("window must be an integer") diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index af502ff690ed8..eb6cf03bfb944 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3288,7 +3288,7 @@ def test_min_periods(self): result = df.rolling('2s', min_periods=1).sum() tm.assert_frame_equal(result, expected) - def test_left_closed(self): + def test_closed(self): df = DataFrame({'B': [1, 1, 2, np.nan, 4]}, index=[Timestamp('20130101 09:00:00'), @@ -3297,10 +3297,10 @@ def test_left_closed(self): Timestamp('20130101 09:00:05'), Timestamp('20130101 09:00:06')]) expected = df.rolling('4s').count() - result = df.rolling('3s', left_closed=True).count() + result = df.rolling('3s', closed='both').count() tm.assert_frame_equal(result, expected) expected = df.rolling('4s').sum() - result = df.rolling('3s', left_closed=True).sum() + result = df.rolling('3s', closed='both').sum() tm.assert_frame_equal(result, expected) df = DataFrame({'B': [1, 1, 2, np.nan, 4], @@ -3310,11 +3310,11 @@ def test_left_closed(self): Timestamp('20130101 09:00:05'), Timestamp('20130101 09:00:06')]}) expected = df.rolling('4s', on='timestamp').count() - result = df.rolling('3s', left_closed=True, + result = df.rolling('3s', closed='both', on='timestamp').count() tm.assert_frame_equal(result, expected) expected = df.rolling('4s', on='timestamp').sum() - result = df.rolling('3s', left_closed=True, + result = df.rolling('3s', closed='both', on='timestamp').sum() tm.assert_frame_equal(result, expected) @@ -3325,10 +3325,10 @@ def test_left_closed(self): Timestamp('20130106'), Timestamp('20130107')]) expected = df.rolling('4d').count() - result = df.rolling('3d', left_closed=True).count() + result = df.rolling('3d', closed='both').count() tm.assert_frame_equal(result, expected) expected = df.rolling('4d').sum() - result = df.rolling('3d', left_closed=True).sum() + result = df.rolling('3d', closed='both').sum() tm.assert_frame_equal(result, expected) df = DataFrame({'B': [1, 1, 2, np.nan, 4], @@ -3338,11 +3338,11 @@ def test_left_closed(self): Timestamp('20130106'), Timestamp('20130107')]}) expected = df.rolling('4d', on='timestamp').count() - result = df.rolling('3d', left_closed=True, + result = df.rolling('3d', closed='both', on='timestamp').count() tm.assert_frame_equal(result, expected) expected = df.rolling('4d', on='timestamp').sum() - result = df.rolling('3d', left_closed=True, + result = df.rolling('3d', closed='both', on='timestamp').sum() tm.assert_frame_equal(result, expected) @@ -3354,11 +3354,11 @@ def test_left_closed(self): index=[Timestamp('20130101 09:00:30'), Timestamp('20130101 09:01:00'), Timestamp('20130101 09:02:00')]) - result = df.rolling('1min', left_closed=True).sum() + result = df.rolling('1min', closed='both').sum() tm.assert_frame_equal(result, expected) with self.assertRaises(ValueError): - df.rolling('1min', left_closed="'tis wrong") + df.rolling('1min', closed="'tis wrong") def test_ragged_sum(self): From 1c8f692e323dc3a66c37d15db622466088dcd36c Mon Sep 17 00:00:00 2001 From: agraboso Date: Fri, 12 Aug 2016 10:32:41 -0400 Subject: [PATCH 08/12] Add test for TimedeltaIndex and invalid input --- pandas/tests/test_window.py | 74 ++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index eb6cf03bfb944..f724ae4417c11 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3290,6 +3290,14 @@ def test_min_periods(self): def test_closed(self): + # closed=both only valid for datetimelike + with self.assertRaises(ValueError): + self.regular.rolling(window=3, closed='both') + + # closed must be 'right' or 'both' + with self.assertRaises(ValueError): + self.regular.rolling(window='1min', closed="'tis wrong") + df = DataFrame({'B': [1, 1, 2, np.nan, 4]}, index=[Timestamp('20130101 09:00:00'), Timestamp('20130101 09:00:02'), @@ -3303,6 +3311,14 @@ def test_closed(self): result = df.rolling('3s', closed='both').sum() tm.assert_frame_equal(result, expected) + df.index = df.index - Timestamp('20130101 09:00:00') + expected = df.rolling('4s').count() + result = df.rolling('3s', closed='both').count() + tm.assert_frame_equal(result, expected) + expected = df.rolling('4s').sum() + result = df.rolling('3s', closed='both').sum() + tm.assert_frame_equal(result, expected) + df = DataFrame({'B': [1, 1, 2, np.nan, 4], 'timestamp': [Timestamp('20130101 09:00:00'), Timestamp('20130101 09:00:02'), @@ -3310,12 +3326,18 @@ def test_closed(self): Timestamp('20130101 09:00:05'), Timestamp('20130101 09:00:06')]}) expected = df.rolling('4s', on='timestamp').count() - result = df.rolling('3s', closed='both', - on='timestamp').count() + result = df.rolling('3s', on='timestamp', closed='both').count() + tm.assert_frame_equal(result, expected) + expected = df.rolling('4s', on='timestamp').sum() + result = df.rolling('3s', on='timestamp', closed='both').sum() + tm.assert_frame_equal(result, expected) + + df['timestamp'] = df['timestamp'] - Timestamp('20130101 09:00:00') + expected = df.rolling('4s', on='timestamp').count() + result = df.rolling('3s', on='timestamp', closed='both').count() tm.assert_frame_equal(result, expected) expected = df.rolling('4s', on='timestamp').sum() - result = df.rolling('3s', closed='both', - on='timestamp').sum() + result = df.rolling('3s', on='timestamp', closed='both').sum() tm.assert_frame_equal(result, expected) df = DataFrame({'B': [1, 1, 2, np.nan, 4]}, @@ -3331,6 +3353,14 @@ def test_closed(self): result = df.rolling('3d', closed='both').sum() tm.assert_frame_equal(result, expected) + df.index = df.index - Timestamp('20130101') + expected = df.rolling('4d').count() + result = df.rolling('3d', closed='both').count() + tm.assert_frame_equal(result, expected) + expected = df.rolling('4d').sum() + result = df.rolling('3d', closed='both').sum() + tm.assert_frame_equal(result, expected) + df = DataFrame({'B': [1, 1, 2, np.nan, 4], 'timestamp': [Timestamp('20130101'), Timestamp('20130103'), @@ -3338,18 +3368,29 @@ def test_closed(self): Timestamp('20130106'), Timestamp('20130107')]}) expected = df.rolling('4d', on='timestamp').count() - result = df.rolling('3d', closed='both', - on='timestamp').count() + result = df.rolling('3d', on='timestamp', closed='both').count() + tm.assert_frame_equal(result, expected) + expected = df.rolling('4d', on='timestamp').sum() + result = df.rolling('3d', on='timestamp', closed='both').sum() + tm.assert_frame_equal(result, expected) + + df['timestamp'] = df['timestamp'] - Timestamp('20130101') + expected = df.rolling('4d', on='timestamp').count() + result = df.rolling('3d', on='timestamp', closed='both').count() tm.assert_frame_equal(result, expected) expected = df.rolling('4d', on='timestamp').sum() - result = df.rolling('3d', closed='both', - on='timestamp').sum() + result = df.rolling('3d', on='timestamp', closed='both').sum() tm.assert_frame_equal(result, expected) df = DataFrame({'B': [1] * 3}, index=[Timestamp('20130101 09:00:30'), Timestamp('20130101 09:01:00'), Timestamp('20130101 09:02:00')]) + expected = DataFrame({'B': [1, 2, 2]}, + index=[Timestamp('20130101 09:00:30'), + Timestamp('20130101 09:01:00'), + Timestamp('20130101 09:02:00')]) + result = df.rolling('1min', closed='both').count() expected = DataFrame({'B': [1., 2., 2.]}, index=[Timestamp('20130101 09:00:30'), Timestamp('20130101 09:01:00'), @@ -3357,8 +3398,21 @@ def test_closed(self): result = df.rolling('1min', closed='both').sum() tm.assert_frame_equal(result, expected) - with self.assertRaises(ValueError): - df.rolling('1min', closed="'tis wrong") + df = DataFrame({'B': [1] * 3, + 'timestamp': [Timestamp('20130101 09:00:30'), + Timestamp('20130101 09:01:00'), + Timestamp('20130101 09:02:00')]}) + expected = DataFrame({'B': [1, 2, 2], + 'timestamp': [Timestamp('20130101 09:00:30'), + Timestamp('20130101 09:01:00'), + Timestamp('20130101 09:02:00')]}) + result = df.rolling('1min', on='timestamp', closed='both').count() + expected = DataFrame({'B': [1., 2., 2.], + 'timestamp': [Timestamp('20130101 09:00:30'), + Timestamp('20130101 09:01:00'), + Timestamp('20130101 09:02:00')]}) + result = df.rolling('1min', on='timestamp', closed='both').sum() + tm.assert_frame_equal(result, expected) def test_ragged_sum(self): From 9886cead8ce9b0bdcb0a1af2cdd98ae464a6e65b Mon Sep 17 00:00:00 2001 From: agraboso Date: Fri, 12 Aug 2016 17:26:48 -0400 Subject: [PATCH 09/12] Add tests for closed='right' --- pandas/tests/test_window.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index f724ae4417c11..16186776018cb 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3310,6 +3310,9 @@ def test_closed(self): expected = df.rolling('4s').sum() result = df.rolling('3s', closed='both').sum() tm.assert_frame_equal(result, expected) + expected = df.rolling('3s').count() + result = df.rolling('3s', closed='right').count() + tm.assert_frame_equal(result, expected) df.index = df.index - Timestamp('20130101 09:00:00') expected = df.rolling('4s').count() @@ -3318,6 +3321,9 @@ def test_closed(self): expected = df.rolling('4s').sum() result = df.rolling('3s', closed='both').sum() tm.assert_frame_equal(result, expected) + expected = df.rolling('3s').sum() + result = df.rolling('3s', closed='right').sum() + tm.assert_frame_equal(result, expected) df = DataFrame({'B': [1, 1, 2, np.nan, 4], 'timestamp': [Timestamp('20130101 09:00:00'), @@ -3352,6 +3358,9 @@ def test_closed(self): expected = df.rolling('4d').sum() result = df.rolling('3d', closed='both').sum() tm.assert_frame_equal(result, expected) + expected = df.rolling('3d').count() + result = df.rolling('3d', closed='right').count() + tm.assert_frame_equal(result, expected) df.index = df.index - Timestamp('20130101') expected = df.rolling('4d').count() @@ -3360,6 +3369,9 @@ def test_closed(self): expected = df.rolling('4d').sum() result = df.rolling('3d', closed='both').sum() tm.assert_frame_equal(result, expected) + expected = df.rolling('3d').count() + result = df.rolling('3d', closed='right').count() + tm.assert_frame_equal(result, expected) df = DataFrame({'B': [1, 1, 2, np.nan, 4], 'timestamp': [Timestamp('20130101'), From 5f75c254ccfaee2ab0adf9f2eaabec413f6e4031 Mon Sep 17 00:00:00 2001 From: agraboso Date: Fri, 12 Aug 2016 17:27:18 -0400 Subject: [PATCH 10/12] Add closed argument to docstring --- pandas/core/window.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/core/window.py b/pandas/core/window.py index 8dafaaf10be39..5e1bb0729d8c3 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -376,6 +376,9 @@ class Window(_Window): .. versionadded:: 0.19.0 axis : int or string, default 0 + closed: 'right' or 'both', default 'right' + For offset-based windows, make the interval closed only on the right + or on both endpoints. Returns ------- @@ -455,6 +458,14 @@ class Window(_Window): 2013-01-01 09:00:05 NaN 2013-01-01 09:00:06 4.0 + >>> df.rolling('2s', closed='both').sum() + B + 2013-01-01 09:00:00 0.0 + 2013-01-01 09:00:02 1.0 + 2013-01-01 09:00:03 3.0 + 2013-01-01 09:00:05 2.0 + 2013-01-01 09:00:06 4.0 + Notes ----- By default, the result is set to the right edge of the window. This can be From 9f6810b3e4a79a4f95c1c3a6038b83f129f75141 Mon Sep 17 00:00:00 2001 From: agraboso Date: Tue, 23 Aug 2016 17:03:45 -0400 Subject: [PATCH 11/12] Cleanup --- pandas/core/window.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 5e1bb0729d8c3..99d79e0c45934 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -54,11 +54,12 @@ class _Window(PandasObject, SelectionMixin): _attributes = ['window', 'min_periods', 'freq', 'center', 'win_type', - 'axis', 'on'] + 'axis', 'on', 'closed'] exclusions = set() def __init__(self, obj, window=None, min_periods=None, freq=None, - center=False, win_type=None, axis=0, on=None, **kwargs): + center=False, win_type=None, axis=0, on=None, closed='right', + **kwargs): if freq is not None: warnings.warn("The freq kw is deprecated and will be removed in a " @@ -69,6 +70,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None, self.blocks = [] self.obj = obj self.on = on + self.closed = closed self.window = window self.min_periods = min_periods self.freq = freq @@ -99,6 +101,8 @@ def validate(self): if self.min_periods is not None and not \ is_integer(self.min_periods): raise ValueError("min_periods must be an integer") + if self.closed not in ['right', 'both']: + raise ValueError("closed must be right or both") def _convert_freq(self, how=None): """ resample according to the how, return a new object """ @@ -376,6 +380,9 @@ class Window(_Window): .. versionadded:: 0.19.0 axis : int or string, default 0 + + .. versionadded:: 0.19.0 + closed: 'right' or 'both', default 'right' For offset-based windows, make the interval closed only on the right or on both endpoints. @@ -458,6 +465,9 @@ class Window(_Window): 2013-01-01 09:00:05 NaN 2013-01-01 09:00:06 4.0 + For time-based windows, it is possible to make the resulting window + contain its left edge by setting closed='both'. + >>> df.rolling('2s', closed='both').sum() B 2013-01-01 09:00:00 0.0 @@ -1008,17 +1018,6 @@ def _get_corr(a, b): class Rolling(_Rolling_and_Expanding): - _attributes = ['window', 'min_periods', 'freq', 'center', 'win_type', - 'axis', 'on', 'closed'] - - def __init__(self, obj, window=None, min_periods=None, freq=None, - center=False, win_type=None, axis=0, on=None, - closed='right', **kwargs): - self.closed = closed - super(Rolling, self).__init__(obj=obj, window=window, - min_periods=min_periods, freq=freq, - center=center, win_type=win_type, - axis=axis, on=on, **kwargs) @cache_readonly def is_datetimelike(self): @@ -1043,9 +1042,6 @@ def _on(self): def validate(self): super(Rolling, self).validate() - if self.closed not in ['right', 'both']: - raise ValueError("closed must be right or both") - # we allow rolling on a datetimelike index if (self.is_datetimelike and isinstance(self.window, (compat.string_types, DateOffset))): From 62ff60488d2cc1fa5a1e71dae6ac4ce5e3c65ce6 Mon Sep 17 00:00:00 2001 From: agraboso Date: Tue, 23 Aug 2016 17:23:58 -0400 Subject: [PATCH 12/12] Consolidate tests --- pandas/tests/test_window.py | 81 +------------------------------------ 1 file changed, 1 insertion(+), 80 deletions(-) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 16186776018cb..a9edb6e2bf12b 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -3307,9 +3307,6 @@ def test_closed(self): expected = df.rolling('4s').count() result = df.rolling('3s', closed='both').count() tm.assert_frame_equal(result, expected) - expected = df.rolling('4s').sum() - result = df.rolling('3s', closed='both').sum() - tm.assert_frame_equal(result, expected) expected = df.rolling('3s').count() result = df.rolling('3s', closed='right').count() tm.assert_frame_equal(result, expected) @@ -3318,12 +3315,6 @@ def test_closed(self): expected = df.rolling('4s').count() result = df.rolling('3s', closed='both').count() tm.assert_frame_equal(result, expected) - expected = df.rolling('4s').sum() - result = df.rolling('3s', closed='both').sum() - tm.assert_frame_equal(result, expected) - expected = df.rolling('3s').sum() - result = df.rolling('3s', closed='right').sum() - tm.assert_frame_equal(result, expected) df = DataFrame({'B': [1, 1, 2, np.nan, 4], 'timestamp': [Timestamp('20130101 09:00:00'), @@ -3334,17 +3325,6 @@ def test_closed(self): expected = df.rolling('4s', on='timestamp').count() result = df.rolling('3s', on='timestamp', closed='both').count() tm.assert_frame_equal(result, expected) - expected = df.rolling('4s', on='timestamp').sum() - result = df.rolling('3s', on='timestamp', closed='both').sum() - tm.assert_frame_equal(result, expected) - - df['timestamp'] = df['timestamp'] - Timestamp('20130101 09:00:00') - expected = df.rolling('4s', on='timestamp').count() - result = df.rolling('3s', on='timestamp', closed='both').count() - tm.assert_frame_equal(result, expected) - expected = df.rolling('4s', on='timestamp').sum() - result = df.rolling('3s', on='timestamp', closed='both').sum() - tm.assert_frame_equal(result, expected) df = DataFrame({'B': [1, 1, 2, np.nan, 4]}, index=[Timestamp('20130101'), @@ -3355,75 +3335,16 @@ def test_closed(self): expected = df.rolling('4d').count() result = df.rolling('3d', closed='both').count() tm.assert_frame_equal(result, expected) - expected = df.rolling('4d').sum() - result = df.rolling('3d', closed='both').sum() - tm.assert_frame_equal(result, expected) - expected = df.rolling('3d').count() - result = df.rolling('3d', closed='right').count() - tm.assert_frame_equal(result, expected) - - df.index = df.index - Timestamp('20130101') - expected = df.rolling('4d').count() - result = df.rolling('3d', closed='both').count() - tm.assert_frame_equal(result, expected) - expected = df.rolling('4d').sum() - result = df.rolling('3d', closed='both').sum() - tm.assert_frame_equal(result, expected) - expected = df.rolling('3d').count() - result = df.rolling('3d', closed='right').count() - tm.assert_frame_equal(result, expected) - - df = DataFrame({'B': [1, 1, 2, np.nan, 4], - 'timestamp': [Timestamp('20130101'), - Timestamp('20130103'), - Timestamp('20130104'), - Timestamp('20130106'), - Timestamp('20130107')]}) - expected = df.rolling('4d', on='timestamp').count() - result = df.rolling('3d', on='timestamp', closed='both').count() - tm.assert_frame_equal(result, expected) - expected = df.rolling('4d', on='timestamp').sum() - result = df.rolling('3d', on='timestamp', closed='both').sum() - tm.assert_frame_equal(result, expected) - - df['timestamp'] = df['timestamp'] - Timestamp('20130101') - expected = df.rolling('4d', on='timestamp').count() - result = df.rolling('3d', on='timestamp', closed='both').count() - tm.assert_frame_equal(result, expected) - expected = df.rolling('4d', on='timestamp').sum() - result = df.rolling('3d', on='timestamp', closed='both').sum() - tm.assert_frame_equal(result, expected) df = DataFrame({'B': [1] * 3}, index=[Timestamp('20130101 09:00:30'), Timestamp('20130101 09:01:00'), Timestamp('20130101 09:02:00')]) - expected = DataFrame({'B': [1, 2, 2]}, - index=[Timestamp('20130101 09:00:30'), - Timestamp('20130101 09:01:00'), - Timestamp('20130101 09:02:00')]) - result = df.rolling('1min', closed='both').count() expected = DataFrame({'B': [1., 2., 2.]}, index=[Timestamp('20130101 09:00:30'), Timestamp('20130101 09:01:00'), Timestamp('20130101 09:02:00')]) - result = df.rolling('1min', closed='both').sum() - tm.assert_frame_equal(result, expected) - - df = DataFrame({'B': [1] * 3, - 'timestamp': [Timestamp('20130101 09:00:30'), - Timestamp('20130101 09:01:00'), - Timestamp('20130101 09:02:00')]}) - expected = DataFrame({'B': [1, 2, 2], - 'timestamp': [Timestamp('20130101 09:00:30'), - Timestamp('20130101 09:01:00'), - Timestamp('20130101 09:02:00')]}) - result = df.rolling('1min', on='timestamp', closed='both').count() - expected = DataFrame({'B': [1., 2., 2.], - 'timestamp': [Timestamp('20130101 09:00:30'), - Timestamp('20130101 09:01:00'), - Timestamp('20130101 09:02:00')]}) - result = df.rolling('1min', on='timestamp', closed='both').sum() + result = df.rolling('1min', closed='both').count() tm.assert_frame_equal(result, expected) def test_ragged_sum(self):