diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index 4f44fcaab63d4..b97fbc3c7ac2d 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -403,6 +403,28 @@ For some windowing functions, additional parameters must be specified: such that the weights are normalized with respect to each other. Weights of ``[1, 1, 1]`` and ``[2, 2, 2]`` yield the same result. +Iterating over windows +~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.0.0 + +It is also possible to iterate over a series or dataframe with a window function. Currently ``rolling`` and ``expanding`` are supported. + +Iterating over a ``Series`` will yield ``Series`` objects for each window whereas iterating over a ``DataFrame`` object +will yield ``DataFrame`` objects. + +.. ipython:: python + + s = pd.Series(np.random.randn(5)).rolling(2) + for x in s: + x + +.. ipython:: python + + df = pd.DataFrame({ "A": [1, 2, 3], "B": [4, 5, 6]}) + for x in df: + x + .. _stats.moments.ts: Time-aware rolling diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 9caf127553e05..921e3be41236e 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -163,7 +163,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Improved :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Window` it is now possible to iterate over a window object (:issue:`12537`) - Reshaping diff --git a/pandas/core/window.py b/pandas/core/window.py index 20d5453cc43a2..ce6c9c1d5c83e 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -194,8 +194,30 @@ def __repr__(self) -> str: ) def __iter__(self): - url = "https://github.com/pandas-dev/pandas/issues/11704" - raise NotImplementedError("See issue #11704 {url}".format(url=url)) + closed = self.closed + window = self._get_window() + minp = _use_window(self.min_periods, window) + offset = _offset(window, self.center) + + blocks, obj = self._create_blocks() + index = self._get_index() + + for values in blocks: + start, end, N, win, _minp, is_variable = libwindow.get_window_indexer( + np.asarray(values), window, minp, index, closed + ) + + for i in range(N): + if is_variable: + s = start[i] + e = end[i] + else: + s = max(i - win + offset + 1, 0) + e = min(i + offset + 1, N) + + result = values.iloc[slice(s, e)] + if result.count().min() >= _minp: + yield result def _get_index(self) -> Optional[np.ndarray]: """ diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 1e92c981964c5..103b67a9d7a18 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -90,14 +90,6 @@ def test_missing_minp_zero(self): expected = pd.Series([np.nan]) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) - def test_iter_raises(self, klass): - # https://github.com/pandas-dev/pandas/issues/11704 - # Iteration over a Window - obj = klass([1, 2, 3, 4]) - with pytest.raises(NotImplementedError): - iter(obj.expanding(2)) - def test_expanding_axis(self, axis_frame): # see gh-23372. df = DataFrame(np.ones((10, 20))) diff --git a/pandas/tests/window/test_iterator.py b/pandas/tests/window/test_iterator.py new file mode 100644 index 0000000000000..aaa2569011f2b --- /dev/null +++ b/pandas/tests/window/test_iterator.py @@ -0,0 +1,270 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Series, date_range +from pandas.tests.window.common import Base +import pandas.util.testing as tm + + +# Tests for GH11704 +class TestExpanding(Base): + @pytest.mark.parametrize( + "dataframe,expected,window", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])], + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 1, + ), + (DataFrame({"A": [1], "B": [4]}), [], 1337), + (DataFrame(), [({}, [])], 1337), + ], + ) + def test_iterator_expanding_dataframe(self, dataframe, expected, window): + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, dataframe.expanding(min_periods=window) + ): + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "series,expected,window", + [ + (Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3), + (Series([1, 2, 3]), [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2), + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], + 1, + ), + (Series([1, 2]), [([1, 2], [0, 1])], 1337), + (Series([]), [], 1337), + ], + ) + def test_iterator_expanding_series(self, series, expected, window): + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, series.expanding(min_periods=window)): + tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "dataframe,expected,window", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])], + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + 1, + ), + (DataFrame({"A": [1], "B": [4]}), [], 1337), + (DataFrame(), [({}, [])], 1337), + ], + ) + def test_iterator_rolling_dataframe(self, dataframe, expected, window): + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, dataframe.rolling(window)): + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "series,expected,window", + [ + (Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3), + (Series([1, 2, 3]), [([1, 2], [0, 1]), ([2, 3], [1, 2])], 2), + (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1), + (Series([1, 2]), [([1, 2], [0, 1])], 1337), + (Series([]), [], 1337), + ], + ) + def test_iterator_rolling_series(self, series, expected, window): + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, series.rolling(window)): + tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window,minp", + [ + ( + [ + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 3, + 2, + ), + ([], 3, 4), + ], + ) + def test_iterator_rolling_dataframe_min_periods(self, expected, window, minp): + dataframe = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, dataframe.rolling(window, min_periods=minp) + ): + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window,minp", + [([([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3, 2), ([], 3, 4)], + ) + def test_iterator_rolling_series_min_periods(self, expected, window, minp): + series = Series([1, 2, 3]) + + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, series.rolling(window, min_periods=minp) + ): + tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window,minp", + [ + ([([1.0, np.nan, 3.0], [0, 1, 2]), ([np.nan, 3.0, 4.0], [1, 2, 3])], 3, 2), + ([([1.0, np.nan, 3.0, 4.0], [0, 1, 2, 3])], 4, 3), + ([], 4, 4), + ], + ) + def test_iterator_rolling_series_min_periods_nan(self, expected, window, minp): + series = Series([1, np.nan, 3, 4]) + + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, series.rolling(window, min_periods=minp) + ): + tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window,minp", + [ + ([({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2])], 3, 2), + ([], 2, 2), + ([], 3, 4), + ], + ) + def test_iterator_rolling_dataframe_min_periods_nan(self, expected, window, minp): + dataframe = DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}) + + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, dataframe.rolling(window, min_periods=minp) + ): + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window", + [ + ([([0], [0]), ([1], [1]), ([2], [2]), ([3], [3]), ([4], [4])], "1s"), + ( + [ + ([0], [0]), + ([0, 1], [0, 1]), + ([1, 2], [1, 2]), + ([2, 3], [2, 3]), + ([3, 4], [3, 4]), + ], + "2S", + ), + ( + [ + ([0], [0]), + ([0, 1], [0, 1]), + ([0, 1, 2], [0, 1, 2]), + ([1, 2, 3], [1, 2, 3]), + ([2, 3, 4], [2, 3, 4]), + ], + "3S", + ), + ], + ) + def test_iterator_rolling_series_time(self, expected, window): + series = Series( + range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s") + ) + + expected = [ + Series(values, index=series.index[index]) for (values, index) in expected + ] + + for (expected, actual) in zip(expected, series.rolling(window)): + tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window,minp", + [ + ([], "1s", 2), + ( + [ + ([0, 1], [0, 1]), + ([1, 2], [1, 2]), + ([2, 3], [2, 3]), + ([3, 4], [3, 4]), + ], + "2S", + 2, + ), + ( + [ + ([0], [0]), + ([0, 1], [0, 1]), + ([0, 1, 2], [0, 1, 2]), + ([1, 2, 3], [1, 2, 3]), + ([2, 3, 4], [2, 3, 4]), + ], + "3S", + 1, + ), + ], + ) + def test_iterator_rolling_series_time_min_periods(self, expected, window, minp): + series = Series( + range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s") + ) + + expected = [ + Series(values, index=series.index[index]) for (values, index) in expected + ] + + for (expected, actual) in zip( + expected, series.rolling(window, min_periods=minp) + ): + tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index c7177e1d3914f..d4966706d1731 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -291,14 +291,6 @@ def test_multi_index_names(self): tm.assert_index_equal(result.columns, df.columns) assert result.index.names == [None, "1", "2"] - @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) - def test_iter_raises(self, klass): - # https://github.com/pandas-dev/pandas/issues/11704 - # Iteration over a Window - obj = klass([1, 2, 3, 4]) - with pytest.raises(NotImplementedError): - iter(obj.rolling(2)) - def test_rolling_axis_sum(self, axis_frame): # see gh-23372. df = DataFrame(np.ones((10, 20)))