-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: allow exact matches in time-based .rolling() #13968
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d65658d
758986d
396e058
3df0da1
84cebbf
dc2d6fd
8291645
1c8f692
9886cea
5f75c25
9f6810b
62ff604
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,11 +54,12 @@ | |
|
||
class _Window(PandasObject, SelectionMixin): | ||
_attributes = ['window', 'min_periods', 'freq', 'center', 'win_type', | ||
'axis', 'on'] | ||
'axis', 'on', 'closed'] | ||
exclusions = set() | ||
|
||
def __init__(self, obj, window=None, min_periods=None, freq=None, | ||
center=False, win_type=None, axis=0, on=None, **kwargs): | ||
center=False, win_type=None, axis=0, on=None, closed='right', | ||
**kwargs): | ||
|
||
if freq is not None: | ||
warnings.warn("The freq kw is deprecated and will be removed in a " | ||
|
@@ -69,6 +70,7 @@ def __init__(self, obj, window=None, min_periods=None, freq=None, | |
self.blocks = [] | ||
self.obj = obj | ||
self.on = on | ||
self.closed = closed | ||
self.window = window | ||
self.min_periods = min_periods | ||
self.freq = freq | ||
|
@@ -99,6 +101,8 @@ def validate(self): | |
if self.min_periods is not None and not \ | ||
is_integer(self.min_periods): | ||
raise ValueError("min_periods must be an integer") | ||
if self.closed not in ['right', 'both']: | ||
raise ValueError("closed must be right or both") | ||
|
||
def _convert_freq(self, how=None): | ||
""" resample according to the how, return a new object """ | ||
|
@@ -377,6 +381,12 @@ class Window(_Window): | |
|
||
axis : int or string, default 0 | ||
|
||
.. versionadded:: 0.19.0 | ||
|
||
closed: 'right' or 'both', default 'right' | ||
For offset-based windows, make the interval closed only on the right | ||
or on both endpoints. | ||
|
||
Returns | ||
------- | ||
a Window or Rolling sub-classed for the particular operation | ||
|
@@ -455,6 +465,17 @@ class Window(_Window): | |
2013-01-01 09:00:05 NaN | ||
2013-01-01 09:00:06 4.0 | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a comment on what this does |
||
For time-based windows, it is possible to make the resulting window | ||
contain its left edge by setting closed='both'. | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you show closed='right', for comparison |
||
>>> df.rolling('2s', closed='both').sum() | ||
B | ||
2013-01-01 09:00:00 0.0 | ||
2013-01-01 09:00:02 1.0 | ||
2013-01-01 09:00:03 3.0 | ||
2013-01-01 09:00:05 2.0 | ||
2013-01-01 09:00:06 4.0 | ||
|
||
Notes | ||
----- | ||
By default, the result is set to the right edge of the window. This can be | ||
|
@@ -1047,16 +1068,21 @@ def validate(self): | |
|
||
# this will raise ValueError on non-fixed freqs | ||
self.window = freq.nanos | ||
if self.closed == 'both': | ||
self.window += 1 | ||
self.win_type = 'freq' | ||
|
||
# min_periods must be an integer | ||
if self.min_periods is None: | ||
self.min_periods = 1 | ||
|
||
elif not is_integer(self.window): | ||
raise ValueError("window must be an integer") | ||
elif self.window < 0: | ||
raise ValueError("window must be non-negative") | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you changing this? this is an independent check and must be done that way There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm simply moving them into the |
||
if self.closed == 'both': | ||
raise ValueError("closed=both only valid for datetimelike " | ||
"and offset based windows") | ||
elif not is_integer(self.window): | ||
raise ValueError("window must be an integer") | ||
elif self.window < 0: | ||
raise ValueError("window must be non-negative") | ||
|
||
@Substitution(name='rolling') | ||
@Appender(SelectionMixin._see_also_template) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3288,6 +3288,65 @@ def test_min_periods(self): | |
result = df.rolling('2s', min_periods=1).sum() | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_closed(self): | ||
|
||
# closed=both only valid for datetimelike | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would add some tests here that are the same eactly but include There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
with self.assertRaises(ValueError): | ||
self.regular.rolling(window=3, closed='both') | ||
|
||
# closed must be 'right' or 'both' | ||
with self.assertRaises(ValueError): | ||
self.regular.rolling(window='1min', closed="'tis wrong") | ||
|
||
df = DataFrame({'B': [1, 1, 2, np.nan, 4]}, | ||
index=[Timestamp('20130101 09:00:00'), | ||
Timestamp('20130101 09:00:02'), | ||
Timestamp('20130101 09:00:03'), | ||
Timestamp('20130101 09:00:05'), | ||
Timestamp('20130101 09:00:06')]) | ||
expected = df.rolling('4s').count() | ||
result = df.rolling('3s', closed='both').count() | ||
tm.assert_frame_equal(result, expected) | ||
expected = df.rolling('3s').count() | ||
result = df.rolling('3s', closed='right').count() | ||
tm.assert_frame_equal(result, expected) | ||
|
||
df.index = df.index - Timestamp('20130101 09:00:00') | ||
expected = df.rolling('4s').count() | ||
result = df.rolling('3s', closed='both').count() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is too much duplication. pls limit to relevant tests. |
||
tm.assert_frame_equal(result, expected) | ||
|
||
df = DataFrame({'B': [1, 1, 2, np.nan, 4], | ||
'timestamp': [Timestamp('20130101 09:00:00'), | ||
Timestamp('20130101 09:00:02'), | ||
Timestamp('20130101 09:00:03'), | ||
Timestamp('20130101 09:00:05'), | ||
Timestamp('20130101 09:00:06')]}) | ||
expected = df.rolling('4s', on='timestamp').count() | ||
result = df.rolling('3s', on='timestamp', closed='both').count() | ||
tm.assert_frame_equal(result, expected) | ||
|
||
df = DataFrame({'B': [1, 1, 2, np.nan, 4]}, | ||
index=[Timestamp('20130101'), | ||
Timestamp('20130103'), | ||
Timestamp('20130104'), | ||
Timestamp('20130106'), | ||
Timestamp('20130107')]) | ||
expected = df.rolling('4d').count() | ||
result = df.rolling('3d', closed='both').count() | ||
tm.assert_frame_equal(result, expected) | ||
|
||
df = DataFrame({'B': [1] * 3}, | ||
index=[Timestamp('20130101 09:00:30'), | ||
Timestamp('20130101 09:01:00'), | ||
Timestamp('20130101 09:02:00')]) | ||
expected = DataFrame({'B': [1., 2., 2.]}, | ||
index=[Timestamp('20130101 09:00:30'), | ||
Timestamp('20130101 09:01:00'), | ||
Timestamp('20130101 09:02:00')]) | ||
result = df.rolling('1min', closed='both').count() | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_ragged_sum(self): | ||
|
||
df = self.ragged | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
-> 0.20.0