diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index d371f6d5f273c..cf630a9671013 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -648,6 +648,24 @@ from present information back to past information. This allows the rolling windo Currently, this feature is only implemented for time-based windows. For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed. +.. _stats.iter_rolling_window: + +Iteration over window: +~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.1.0 + +``Rolling`` and ``Expanding`` objects now support iteration. Be noted that ``min_periods`` is ignored in iteration. + +.. ipython:: + + In [1]: df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + In [2]: for i in df.rolling(2): + ...: print(i) + ...: + + .. _stats.moments.ts-versus-resampling: Time-aware rolling vs. resampling diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 73892da2cbf71..eaf8c19b9a21b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -235,6 +235,7 @@ Other enhancements :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`). - :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`). +- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 660fca61fd21c..c615e18af68e6 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -247,8 +247,22 @@ def __repr__(self) -> str: return f"{self._window_type} [{attrs}]" def __iter__(self): - url = "https://github.com/pandas-dev/pandas/issues/11704" - raise NotImplementedError(f"See issue #11704 {url}") + window = self._get_window(win_type=None) + blocks, obj = self._create_blocks() + index = self._get_window_indexer(window=window) + + start, end = index.get_window_bounds( + num_values=len(obj), + min_periods=self.min_periods, + center=self.center, + closed=self.closed, + ) + # From get_window_bounds, those two should be equal in length of array + assert len(start) == len(end) + + for s, e in zip(start, end): + result = obj.iloc[slice(s, e)] + yield result def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray: """Convert input to numpy arrays for Cython routines""" diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index aaa7e9a34fadf..b57467385d371 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -88,15 +88,6 @@ def test_missing_minp_zero(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) -def test_iter_raises(klass): - # https://github.com/pandas-dev/pandas/issues/11704 - # Iteration over a Window - obj = klass([1, 2, 3, 4]) - with pytest.raises(NotImplementedError): - iter(obj.expanding(2)) - - def test_expanding_axis(axis_frame): # see gh-23372. df = DataFrame(np.ones((10, 20))) @@ -131,3 +122,91 @@ def test_expanding_count_default_min_periods_with_null_values(constructor): result = constructor(values).expanding().count() expected = constructor(expected_counts) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "df,expected,min_periods", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 1, + ), + (DataFrame({"A": [1], "B": [4]}), [], 2), + (DataFrame(), [({}, [])], 1), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 3, + ), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 1, + ), + ], +) +def test_iter_expanding_dataframe(df, expected, min_periods): + # GH 11704 + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, df.expanding(min_periods)): + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize( + "ser,expected,min_periods", + [ + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1), + (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2), + (Series([np.nan, 2]), [([np.nan], [0]), ([np.nan, 2], [0, 1])], 2), + (Series([], dtype="int64"), [], 2), + ], +) +def test_iter_expanding_series(ser, expected, min_periods): + # GH 11704 + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, ser.expanding(min_periods)): + tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index a7582a86c0848..f9b0e6856337b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -7,7 +7,7 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Series +from pandas import DataFrame, Series, date_range import pandas._testing as tm from pandas.core.window import Rolling @@ -310,18 +310,6 @@ def test_multi_index_names(): assert result.index.names == [None, "1", "2"] -@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) -def test_iter_raises(klass): - # https://github.com/pandas-dev/pandas/issues/11704 - # Iteration over a Window - obj = klass([1, 2, 3, 4]) - - msg = "See issue #11704 https://github.com/pandas-dev/pandas/issues/11704" - - with pytest.raises(NotImplementedError, match=msg): - iter(obj.rolling(2)) - - def test_rolling_axis_sum(axis_frame): # see gh-23372. df = DataFrame(np.ones((10, 20))) @@ -470,3 +458,208 @@ def test_rolling_count_default_min_periods_with_null_values(constructor): result = constructor(values).rolling(3).count() expected = constructor(expected_counts) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "df,expected,window,min_periods", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 3, + None, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + 2, + 1, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + 2, + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + 1, + 1, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + 1, + 2, + ), + (DataFrame({"A": [1], "B": [4]}), [], 2, None), + (DataFrame({"A": [1], "B": [4]}), [], 2, 1), + (DataFrame(), [({}, [])], 2, None), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 3, + 2, + ), + ], +) +def test_iter_rolling_dataframe(df, expected, window, min_periods): + # GH 11704 + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, df.rolling(window, min_periods=min_periods) + ): + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize( + "expected,window", + [ + ( + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + "2D", + ), + ( + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + "3D", + ), + ( + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + "1D", + ), + ], +) +def test_iter_rolling_on_dataframe(expected, window): + # GH 11704 + df = DataFrame( + { + "A": [1, 2, 3, 4, 5], + "B": [4, 5, 6, 7, 8], + "C": date_range(start="2016-01-01", periods=5, freq="D"), + } + ) + + expected = [DataFrame(values, index=index) for (values, index) in expected] + for (expected, actual) in zip(expected, df.rolling(window, on="C")): + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize( + "ser,expected,window, min_periods", + [ + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], + 3, + None, + ), + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], + 3, + 1, + ), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 1), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 3), + (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 0), + (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 2), + (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2, 0), + (Series([], dtype="int64"), [], 2, 1), + ], +) +def test_iter_rolling_series(ser, expected, window, min_periods): + # GH 11704 + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, ser.rolling(window, min_periods=min_periods) + ): + tm.assert_series_equal(actual, expected) + + +@pytest.mark.parametrize( + "expected,expected_index,window", + [ + ( + [[0], [1], [2], [3], [4]], + [ + date_range("2020-01-01", periods=1, freq="D"), + date_range("2020-01-02", periods=1, freq="D"), + date_range("2020-01-03", periods=1, freq="D"), + date_range("2020-01-04", periods=1, freq="D"), + date_range("2020-01-05", periods=1, freq="D"), + ], + "1D", + ), + ( + [[0], [0, 1], [1, 2], [2, 3], [3, 4]], + [ + date_range("2020-01-01", periods=1, freq="D"), + date_range("2020-01-01", periods=2, freq="D"), + date_range("2020-01-02", periods=2, freq="D"), + date_range("2020-01-03", periods=2, freq="D"), + date_range("2020-01-04", periods=2, freq="D"), + ], + "2D", + ), + ( + [[0], [0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]], + [ + date_range("2020-01-01", periods=1, freq="D"), + date_range("2020-01-01", periods=2, freq="D"), + date_range("2020-01-01", periods=3, freq="D"), + date_range("2020-01-02", periods=3, freq="D"), + date_range("2020-01-03", periods=3, freq="D"), + ], + "3D", + ), + ], +) +def test_iter_rolling_datetime(expected, expected_index, window): + # GH 11704 + ser = Series(range(5), index=date_range(start="2020-01-01", periods=5, freq="D")) + + expected = [ + Series(values, index=idx) for (values, idx) in zip(expected, expected_index) + ] + + for (expected, actual) in zip(expected, ser.rolling(window)): + tm.assert_series_equal(actual, expected)