-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Initial implementation of rolling iterators #27399
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 20 commits
0cd558e
c33f2a4
285ebcc
2d782c4
4271782
68db60a
3b10a49
525bdc6
c50a309
ee8f00e
e1cf139
803a18f
4c8adff
1252dfd
e6eb230
9833fa6
0a2b416
6b559e9
62f0997
0d2d5b1
412efa7
df5f199
285b5ba
33e24cb
4e2f1a2
755e0c1
d58e897
e3b060c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -204,8 +204,47 @@ def __repr__(self): | |
) | ||
|
||
def __iter__(self): | ||
url = "https://github.com/pandas-dev/pandas/issues/11704" | ||
raise NotImplementedError("See issue #11704 {url}".format(url=url)) | ||
closed = self.closed | ||
window = self._get_window() | ||
minp = _use_window(self.min_periods, window) | ||
offset = _offset(window, self.center) | ||
|
||
blocks, obj, index = self._create_blocks() | ||
_, indexi = self._get_index(index) | ||
|
||
for values in blocks: | ||
arr = np.asarray(values) | ||
|
||
start, end, N, win, _minp, is_variable = libwindow.get_window_indexer( | ||
arr, window, minp, indexi, closed | ||
) | ||
|
||
if arr.ndim == 1: | ||
arr = np.expand_dims(arr, axis=1) | ||
|
||
counts = libwindow.roll_sum( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of this, could you just generate the slice and yield if the number non-null elements meets
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep! Done! |
||
np.concatenate( | ||
[ | ||
np.isfinite(arr).all(axis=1).astype(float), | ||
np.array([0.0] * offset), | ||
] | ||
), | ||
win, | ||
minp, | ||
index, | ||
closed, | ||
)[offset:] | ||
|
||
for i in range(N): | ||
if counts[i] >= _minp: | ||
if is_variable: | ||
s = start[i] | ||
e = end[i] | ||
else: | ||
s = max(i - win + offset + 1, 0) | ||
e = min(i + offset + 1, N) | ||
|
||
yield values.iloc[slice(s, e)] | ||
|
||
def _get_index(self, index=None): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -90,13 +90,62 @@ def test_missing_minp_zero(self): | |
expected = pd.Series([np.nan]) | ||
tm.assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) | ||
def test_iter_raises(self, klass): | ||
# https://github.com/pandas-dev/pandas/issues/11704 | ||
# Iteration over a Window | ||
obj = klass([1, 2, 3, 4]) | ||
with pytest.raises(NotImplementedError): | ||
iter(obj.expanding(2)) | ||
@pytest.mark.parametrize( | ||
"dataframe,expected,window", | ||
[ | ||
( | ||
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), | ||
[({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])], | ||
3, | ||
), | ||
( | ||
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), | ||
[ | ||
({"A": [1, 2], "B": [4, 5]}, [0, 1]), | ||
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), | ||
], | ||
2, | ||
), | ||
( | ||
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), | ||
[ | ||
({"A": [1], "B": [4]}, [0]), | ||
({"A": [1, 2], "B": [4, 5]}, [0, 1]), | ||
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), | ||
], | ||
1, | ||
), | ||
(DataFrame({"A": [1], "B": [4]}), [], 1337), | ||
(DataFrame(), [({}, [])], 1337), | ||
], | ||
) | ||
def test_iterator_dataframe(self, dataframe, expected, window): | ||
expected = [DataFrame(values, index=index) for (values, index) in expected] | ||
mroeschke marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add the issue number as a comment here |
||
|
||
for (expected, actual) in zip( | ||
expected, dataframe.expanding(min_periods=window) | ||
): | ||
tm.assert_frame_equal(actual, expected) | ||
|
||
@pytest.mark.parametrize( | ||
"series,expected,window", | ||
[ | ||
(Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3), | ||
(Series([1, 2, 3]), [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2), | ||
( | ||
Series([1, 2, 3]), | ||
[([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], | ||
1, | ||
), | ||
(Series([1, 2]), [([1, 2], [0, 1])], 1337), | ||
(Series([]), [], 1337), | ||
], | ||
) | ||
def test_iterator_series(self, series, expected, window): | ||
expected = [Series(values, index=index) for (values, index) in expected] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
|
||
for (expected, actual) in zip(expected, series.expanding(min_periods=window)): | ||
tm.assert_series_equal(actual, expected) | ||
|
||
def test_expanding_axis(self, axis_frame): | ||
# see gh-23372. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move to 1.0 whatsnew
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also provide a reference to the new doc section.