Skip to content

Commit 83530bd

Browse files
ENH: Implement __iter__ for Rolling and Expanding (#34201)
1 parent b59bc5d commit 83530bd

File tree

5 files changed

+329
-24
lines changed

5 files changed

+329
-24
lines changed

doc/source/user_guide/computation.rst

+18
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,24 @@ from present information back to past information. This allows the rolling windo
648648
Currently, this feature is only implemented for time-based windows.
649649
For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed.
650650

651+
.. _stats.iter_rolling_window:
652+
653+
Iteration over window:
654+
~~~~~~~~~~~~~~~~~~~~~~
655+
656+
.. versionadded:: 1.1.0
657+
658+
``Rolling`` and ``Expanding`` objects now support iteration. Be noted that ``min_periods`` is ignored in iteration.
659+
660+
.. ipython::
661+
662+
In [1]: df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
663+
664+
In [2]: for i in df.rolling(2):
665+
...: print(i)
666+
...:
667+
668+
651669
.. _stats.moments.ts-versus-resampling:
652670

653671
Time-aware rolling vs. resampling

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ Other enhancements
235235
:class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`,
236236
and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`).
237237
- :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`).
238+
- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`)
238239

239240
.. ---------------------------------------------------------------------------
240241

pandas/core/window/rolling.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,22 @@ def __repr__(self) -> str:
247247
return f"{self._window_type} [{attrs}]"
248248

249249
def __iter__(self):
250-
url = "https://github.com/pandas-dev/pandas/issues/11704"
251-
raise NotImplementedError(f"See issue #11704 {url}")
250+
window = self._get_window(win_type=None)
251+
blocks, obj = self._create_blocks()
252+
index = self._get_window_indexer(window=window)
253+
254+
start, end = index.get_window_bounds(
255+
num_values=len(obj),
256+
min_periods=self.min_periods,
257+
center=self.center,
258+
closed=self.closed,
259+
)
260+
# From get_window_bounds, those two should be equal in length of array
261+
assert len(start) == len(end)
262+
263+
for s, e in zip(start, end):
264+
result = obj.iloc[slice(s, e)]
265+
yield result
252266

253267
def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray:
254268
"""Convert input to numpy arrays for Cython routines"""

pandas/tests/window/test_expanding.py

+88-9
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,6 @@ def test_missing_minp_zero():
8888
tm.assert_series_equal(result, expected)
8989

9090

91-
@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame])
92-
def test_iter_raises(klass):
93-
# https://github.com/pandas-dev/pandas/issues/11704
94-
# Iteration over a Window
95-
obj = klass([1, 2, 3, 4])
96-
with pytest.raises(NotImplementedError):
97-
iter(obj.expanding(2))
98-
99-
10091
def test_expanding_axis(axis_frame):
10192
# see gh-23372.
10293
df = DataFrame(np.ones((10, 20)))
@@ -131,3 +122,91 @@ def test_expanding_count_default_min_periods_with_null_values(constructor):
131122
result = constructor(values).expanding().count()
132123
expected = constructor(expected_counts)
133124
tm.assert_equal(result, expected)
125+
126+
127+
@pytest.mark.parametrize(
128+
"df,expected,min_periods",
129+
[
130+
(
131+
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
132+
[
133+
({"A": [1], "B": [4]}, [0]),
134+
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
135+
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
136+
],
137+
3,
138+
),
139+
(
140+
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
141+
[
142+
({"A": [1], "B": [4]}, [0]),
143+
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
144+
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
145+
],
146+
2,
147+
),
148+
(
149+
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
150+
[
151+
({"A": [1], "B": [4]}, [0]),
152+
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
153+
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
154+
],
155+
1,
156+
),
157+
(DataFrame({"A": [1], "B": [4]}), [], 2),
158+
(DataFrame(), [({}, [])], 1),
159+
(
160+
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
161+
[
162+
({"A": [1.0], "B": [np.nan]}, [0]),
163+
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
164+
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
165+
],
166+
3,
167+
),
168+
(
169+
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
170+
[
171+
({"A": [1.0], "B": [np.nan]}, [0]),
172+
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
173+
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
174+
],
175+
2,
176+
),
177+
(
178+
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
179+
[
180+
({"A": [1.0], "B": [np.nan]}, [0]),
181+
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
182+
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
183+
],
184+
1,
185+
),
186+
],
187+
)
188+
def test_iter_expanding_dataframe(df, expected, min_periods):
189+
# GH 11704
190+
expected = [DataFrame(values, index=index) for (values, index) in expected]
191+
192+
for (expected, actual) in zip(expected, df.expanding(min_periods)):
193+
tm.assert_frame_equal(actual, expected)
194+
195+
196+
@pytest.mark.parametrize(
197+
"ser,expected,min_periods",
198+
[
199+
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3),
200+
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2),
201+
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1),
202+
(Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2),
203+
(Series([np.nan, 2]), [([np.nan], [0]), ([np.nan, 2], [0, 1])], 2),
204+
(Series([], dtype="int64"), [], 2),
205+
],
206+
)
207+
def test_iter_expanding_series(ser, expected, min_periods):
208+
# GH 11704
209+
expected = [Series(values, index=index) for (values, index) in expected]
210+
211+
for (expected, actual) in zip(expected, ser.expanding(min_periods)):
212+
tm.assert_series_equal(actual, expected)

0 commit comments

Comments
 (0)