Skip to content

ENH: Implement __iter__ for Rolling and Expanding #34201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
May 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions doc/source/user_guide/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,24 @@ from present information back to past information. This allows the rolling windo
Currently, this feature is only implemented for time-based windows.
For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed.

.. _stats.iter_rolling_window:

Iteration over window:
~~~~~~~~~~~~~~~~~~~~~~

.. versionadded:: 1.1.0

``Rolling`` and ``Expanding`` objects now support iteration. Be noted that ``min_periods`` is ignored in iteration.

.. ipython::

In [1]: df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})

In [2]: for i in df.rolling(2):
...: print(i)
...:


.. _stats.moments.ts-versus-resampling:

Time-aware rolling vs. resampling
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ Other enhancements
:class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`,
and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`).
- :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`).
- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`)

.. ---------------------------------------------------------------------------

Expand Down
18 changes: 16 additions & 2 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,22 @@ def __repr__(self) -> str:
return f"{self._window_type} [{attrs}]"

def __iter__(self):
url = "https://github.com/pandas-dev/pandas/issues/11704"
raise NotImplementedError(f"See issue #11704 {url}")
window = self._get_window(win_type=None)
blocks, obj = self._create_blocks()
index = self._get_window_indexer(window=window)

start, end = index.get_window_bounds(
num_values=len(obj),
min_periods=self.min_periods,
center=self.center,
closed=self.closed,
)
# From get_window_bounds, those two should be equal in length of array
assert len(start) == len(end)

for s, e in zip(start, end):
result = obj.iloc[slice(s, e)]
yield result

def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray:
"""Convert input to numpy arrays for Cython routines"""
Expand Down
97 changes: 88 additions & 9 deletions pandas/tests/window/test_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,6 @@ def test_missing_minp_zero():
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame])
def test_iter_raises(klass):
# https://github.com/pandas-dev/pandas/issues/11704
# Iteration over a Window
obj = klass([1, 2, 3, 4])
with pytest.raises(NotImplementedError):
iter(obj.expanding(2))


def test_expanding_axis(axis_frame):
# see gh-23372.
df = DataFrame(np.ones((10, 20)))
Expand Down Expand Up @@ -131,3 +122,91 @@ def test_expanding_count_default_min_periods_with_null_values(constructor):
result = constructor(values).expanding().count()
expected = constructor(expected_counts)
tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"df,expected,min_periods",
[
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
3,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
2,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
1,
),
(DataFrame({"A": [1], "B": [4]}), [], 2),
(DataFrame(), [({}, [])], 1),
(
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
[
({"A": [1.0], "B": [np.nan]}, [0]),
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
],
3,
),
(
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
[
({"A": [1.0], "B": [np.nan]}, [0]),
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
],
2,
),
(
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
[
({"A": [1.0], "B": [np.nan]}, [0]),
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
],
1,
),
],
)
def test_iter_expanding_dataframe(df, expected, min_periods):
# GH 11704
expected = [DataFrame(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(expected, df.expanding(min_periods)):
tm.assert_frame_equal(actual, expected)


@pytest.mark.parametrize(
"ser,expected,min_periods",
[
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3),
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2),
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1),
(Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2),
(Series([np.nan, 2]), [([np.nan], [0]), ([np.nan, 2], [0, 1])], 2),
(Series([], dtype="int64"), [], 2),
],
)
def test_iter_expanding_series(ser, expected, min_periods):
# GH 11704
expected = [Series(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(expected, ser.expanding(min_periods)):
tm.assert_series_equal(actual, expected)
Loading