Skip to content

ENH: allow exact matches in time-based .rolling() #13968

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
4 changes: 2 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5568,12 +5568,12 @@ def _add_series_or_dataframe_operations(cls):

@Appender(rwindow.rolling.__doc__)
def rolling(self, window, min_periods=None, freq=None, center=False,
win_type=None, on=None, axis=0):
win_type=None, on=None, axis=0, closed='right'):
axis = self._get_axis_number(axis)
return rwindow.rolling(self, window=window,
min_periods=min_periods, freq=freq,
center=center, win_type=win_type,
on=on, axis=axis)
on=on, axis=axis, closed=closed)

cls.rolling = rolling

Expand Down
40 changes: 33 additions & 7 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,12 @@

class _Window(PandasObject, SelectionMixin):
_attributes = ['window', 'min_periods', 'freq', 'center', 'win_type',
'axis', 'on']
'axis', 'on', 'closed']
exclusions = set()

def __init__(self, obj, window=None, min_periods=None, freq=None,
center=False, win_type=None, axis=0, on=None, **kwargs):
center=False, win_type=None, axis=0, on=None, closed='right',
**kwargs):

if freq is not None:
warnings.warn("The freq kw is deprecated and will be removed in a "
Expand All @@ -69,6 +70,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None,
self.blocks = []
self.obj = obj
self.on = on
self.closed = closed
self.window = window
self.min_periods = min_periods
self.freq = freq
Expand Down Expand Up @@ -99,6 +101,8 @@ def validate(self):
if self.min_periods is not None and not \
is_integer(self.min_periods):
raise ValueError("min_periods must be an integer")
if self.closed not in ['right', 'both']:
raise ValueError("closed must be right or both")

def _convert_freq(self, how=None):
""" resample according to the how, return a new object """
Expand Down Expand Up @@ -377,6 +381,12 @@ class Window(_Window):

axis : int or string, default 0

.. versionadded:: 0.19.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-> 0.20.0


closed: 'right' or 'both', default 'right'
For offset-based windows, make the interval closed only on the right
or on both endpoints.

Returns
-------
a Window or Rolling sub-classed for the particular operation
Expand Down Expand Up @@ -455,6 +465,17 @@ class Window(_Window):
2013-01-01 09:00:05 NaN
2013-01-01 09:00:06 4.0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a comment on what this does

For time-based windows, it is possible to make the resulting window
contain its left edge by setting closed='both'.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you show closed='right', for comparison

>>> df.rolling('2s', closed='both').sum()
B
2013-01-01 09:00:00 0.0
2013-01-01 09:00:02 1.0
2013-01-01 09:00:03 3.0
2013-01-01 09:00:05 2.0
2013-01-01 09:00:06 4.0

Notes
-----
By default, the result is set to the right edge of the window. This can be
Expand Down Expand Up @@ -1047,16 +1068,21 @@ def validate(self):

# this will raise ValueError on non-fixed freqs
self.window = freq.nanos
if self.closed == 'both':
self.window += 1
self.win_type = 'freq'

# min_periods must be an integer
if self.min_periods is None:
self.min_periods = 1

elif not is_integer(self.window):
raise ValueError("window must be an integer")
elif self.window < 0:
raise ValueError("window must be non-negative")
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are you changing this? this is an independent check and must be done that way

Copy link
Contributor Author

@agraboso agraboso Aug 13, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm simply moving them into the else block below, since they should only be performed for non-(datetimelike with offset-based windows), which is the if above.

if self.closed == 'both':
raise ValueError("closed=both only valid for datetimelike "
"and offset based windows")
elif not is_integer(self.window):
raise ValueError("window must be an integer")
elif self.window < 0:
raise ValueError("window must be non-negative")

@Substitution(name='rolling')
@Appender(SelectionMixin._see_also_template)
Expand Down
59 changes: 59 additions & 0 deletions pandas/tests/test_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -3288,6 +3288,65 @@ def test_min_periods(self):
result = df.rolling('2s', min_periods=1).sum()
tm.assert_frame_equal(result, expected)

def test_closed(self):

# closed=both only valid for datetimelike
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would add some tests here that are the same eactly but include closed='right' (don't have to duplicate all, just a couple)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

with self.assertRaises(ValueError):
self.regular.rolling(window=3, closed='both')

# closed must be 'right' or 'both'
with self.assertRaises(ValueError):
self.regular.rolling(window='1min', closed="'tis wrong")

df = DataFrame({'B': [1, 1, 2, np.nan, 4]},
index=[Timestamp('20130101 09:00:00'),
Timestamp('20130101 09:00:02'),
Timestamp('20130101 09:00:03'),
Timestamp('20130101 09:00:05'),
Timestamp('20130101 09:00:06')])
expected = df.rolling('4s').count()
result = df.rolling('3s', closed='both').count()
tm.assert_frame_equal(result, expected)
expected = df.rolling('3s').count()
result = df.rolling('3s', closed='right').count()
tm.assert_frame_equal(result, expected)

df.index = df.index - Timestamp('20130101 09:00:00')
expected = df.rolling('4s').count()
result = df.rolling('3s', closed='both').count()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is too much duplication. pls limit to relevant tests.

tm.assert_frame_equal(result, expected)

df = DataFrame({'B': [1, 1, 2, np.nan, 4],
'timestamp': [Timestamp('20130101 09:00:00'),
Timestamp('20130101 09:00:02'),
Timestamp('20130101 09:00:03'),
Timestamp('20130101 09:00:05'),
Timestamp('20130101 09:00:06')]})
expected = df.rolling('4s', on='timestamp').count()
result = df.rolling('3s', on='timestamp', closed='both').count()
tm.assert_frame_equal(result, expected)

df = DataFrame({'B': [1, 1, 2, np.nan, 4]},
index=[Timestamp('20130101'),
Timestamp('20130103'),
Timestamp('20130104'),
Timestamp('20130106'),
Timestamp('20130107')])
expected = df.rolling('4d').count()
result = df.rolling('3d', closed='both').count()
tm.assert_frame_equal(result, expected)

df = DataFrame({'B': [1] * 3},
index=[Timestamp('20130101 09:00:30'),
Timestamp('20130101 09:01:00'),
Timestamp('20130101 09:02:00')])
expected = DataFrame({'B': [1., 2., 2.]},
index=[Timestamp('20130101 09:00:30'),
Timestamp('20130101 09:01:00'),
Timestamp('20130101 09:02:00')])
result = df.rolling('1min', closed='both').count()
tm.assert_frame_equal(result, expected)

def test_ragged_sum(self):

df = self.ragged
Expand Down