Skip to content

Initial implementation of rolling iterators #27399

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
0cd558e
Initial implementation of rolling iterators
ThomasKluiters Jul 15, 2019
c33f2a4
Implement simple test cases
ThomasKluiters Jul 15, 2019
285ebcc
Add whatsnew entry
ThomasKluiters Jul 15, 2019
2d782c4
Fix Cython compile error
ThomasKluiters Jul 15, 2019
4271782
Add expanding test cases
ThomasKluiters Jul 15, 2019
68db60a
Implement date time index aware window
ThomasKluiters Jul 15, 2019
3b10a49
Reformat
ThomasKluiters Jul 15, 2019
525bdc6
Format window.pyx
ThomasKluiters Jul 15, 2019
c50a309
Fix failing tests
Jul 16, 2019
ee8f00e
Remove validate from iter
ThomasKluiters Jul 16, 2019
e1cf139
Merge branch 'master' of https://github.com/pandas-dev/pandas into ro…
ThomasKluiters Jul 16, 2019
803a18f
Refactor get_window_indexer to _Window
ThomasKluiters Jul 16, 2019
4c8adff
Merge branch 'master' of https://github.com/pandas-dev/pandas into ro…
ThomasKluiters Jul 17, 2019
1252dfd
Merge branch 'rolling-iterator' of https://www.github.com/ThomasKluit…
ThomasKluiters Jul 17, 2019
e6eb230
Remove enumerate
ThomasKluiters Jul 17, 2019
9833fa6
Add tests for min_periods
ThomasKluiters Jul 17, 2019
0a2b416
Reformat tests
ThomasKluiters Jul 17, 2019
6b559e9
Refactor tests
ThomasKluiters Jul 17, 2019
62f0997
Implement support for nan values
ThomasKluiters Jul 17, 2019
0d2d5b1
Rename duplicate test name
ThomasKluiters Jul 17, 2019
412efa7
Use count() instead of roll_sum()
ThomasKluiters Jul 18, 2019
df5f199
Merge branch 'master' of https://github.com/pandas-dev/pandas into ro…
ThomasKluiters Jul 18, 2019
285b5ba
Update create_blocks and _get_index
ThomasKluiters Jul 18, 2019
33e24cb
Refactor tests into test_iterator
ThomasKluiters Jul 21, 2019
4e2f1a2
Merge branch 'master' of https://github.com/pandas-dev/pandas into ro…
ThomasKluiters Jul 21, 2019
755e0c1
Move whatsnew entry to 1.0.0
ThomasKluiters Jul 21, 2019
d58e897
Add iterators to computation.rst
ThomasKluiters Jul 21, 2019
e3b060c
Add issue #
ThomasKluiters Jul 21, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions doc/source/user_guide/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,28 @@ For some windowing functions, additional parameters must be specified:
such that the weights are normalized with respect to each other. Weights
of ``[1, 1, 1]`` and ``[2, 2, 2]`` yield the same result.

Iterating over windows
~~~~~~~~~~~~~~~~~~~~~~

.. versionadded:: 1.0.0

It is also possible to iterate over a series or dataframe with a window function. Currently ``rolling`` and ``expanding`` are supported.

Iterating over a ``Series`` will yield ``Series`` objects for each window whereas iterating over a ``DataFrame`` object
will yield ``DataFrame`` objects.

.. ipython:: python

s = pd.Series(np.random.randn(5)).rolling(2)
for x in s:
x

.. ipython:: python

df = pd.DataFrame({ "A": [1, 2, 3], "B": [4, 5, 6]})
for x in df:
x

.. _stats.moments.ts:

Time-aware rolling
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ Plotting
Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^

-
- Improved :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Window` it is now possible to iterate over a window object (:issue:`12537`)
-

Reshaping
Expand Down
26 changes: 24 additions & 2 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,30 @@ def __repr__(self) -> str:
)

def __iter__(self):
url = "https://github.com/pandas-dev/pandas/issues/11704"
raise NotImplementedError("See issue #11704 {url}".format(url=url))
closed = self.closed
window = self._get_window()
minp = _use_window(self.min_periods, window)
offset = _offset(window, self.center)

blocks, obj = self._create_blocks()
index = self._get_index()

for values in blocks:
start, end, N, win, _minp, is_variable = libwindow.get_window_indexer(
np.asarray(values), window, minp, index, closed
)

for i in range(N):
if is_variable:
s = start[i]
e = end[i]
else:
s = max(i - win + offset + 1, 0)
e = min(i + offset + 1, N)

result = values.iloc[slice(s, e)]
if result.count().min() >= _minp:
yield result

def _get_index(self) -> Optional[np.ndarray]:
"""
Expand Down
8 changes: 0 additions & 8 deletions pandas/tests/window/test_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,6 @@ def test_missing_minp_zero(self):
expected = pd.Series([np.nan])
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame])
def test_iter_raises(self, klass):
# https://github.com/pandas-dev/pandas/issues/11704
# Iteration over a Window
obj = klass([1, 2, 3, 4])
with pytest.raises(NotImplementedError):
iter(obj.expanding(2))

def test_expanding_axis(self, axis_frame):
# see gh-23372.
df = DataFrame(np.ones((10, 20)))
Expand Down
270 changes: 270 additions & 0 deletions pandas/tests/window/test_iterator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
import numpy as np
import pytest

from pandas import DataFrame, Series, date_range
from pandas.tests.window.common import Base
import pandas.util.testing as tm


# Tests for GH11704
class TestExpanding(Base):
@pytest.mark.parametrize(
"dataframe,expected,window",
[
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])],
3,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
2,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
1,
),
(DataFrame({"A": [1], "B": [4]}), [], 1337),
(DataFrame(), [({}, [])], 1337),
],
)
def test_iterator_expanding_dataframe(self, dataframe, expected, window):
expected = [DataFrame(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(
expected, dataframe.expanding(min_periods=window)
):
tm.assert_frame_equal(actual, expected)

@pytest.mark.parametrize(
"series,expected,window",
[
(Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3),
(Series([1, 2, 3]), [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2),
(
Series([1, 2, 3]),
[([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])],
1,
),
(Series([1, 2]), [([1, 2], [0, 1])], 1337),
(Series([]), [], 1337),
],
)
def test_iterator_expanding_series(self, series, expected, window):
expected = [Series(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(expected, series.expanding(min_periods=window)):
tm.assert_series_equal(actual, expected)

@pytest.mark.parametrize(
"dataframe,expected,window",
[
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])],
3,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [2, 3], "B": [5, 6]}, [1, 2]),
],
2,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [2], "B": [5]}, [1]),
({"A": [3], "B": [6]}, [2]),
],
1,
),
(DataFrame({"A": [1], "B": [4]}), [], 1337),
(DataFrame(), [({}, [])], 1337),
],
)
def test_iterator_rolling_dataframe(self, dataframe, expected, window):
expected = [DataFrame(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(expected, dataframe.rolling(window)):
tm.assert_frame_equal(actual, expected)

@pytest.mark.parametrize(
"series,expected,window",
[
(Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3),
(Series([1, 2, 3]), [([1, 2], [0, 1]), ([2, 3], [1, 2])], 2),
(Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1),
(Series([1, 2]), [([1, 2], [0, 1])], 1337),
(Series([]), [], 1337),
],
)
def test_iterator_rolling_series(self, series, expected, window):
expected = [Series(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(expected, series.rolling(window)):
tm.assert_series_equal(actual, expected)

@pytest.mark.parametrize(
"expected,window,minp",
[
(
[
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
3,
2,
),
([], 3, 4),
],
)
def test_iterator_rolling_dataframe_min_periods(self, expected, window, minp):
dataframe = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})

expected = [DataFrame(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(
expected, dataframe.rolling(window, min_periods=minp)
):
tm.assert_frame_equal(actual, expected)

@pytest.mark.parametrize(
"expected,window,minp",
[([([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3, 2), ([], 3, 4)],
)
def test_iterator_rolling_series_min_periods(self, expected, window, minp):
series = Series([1, 2, 3])

expected = [Series(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(
expected, series.rolling(window, min_periods=minp)
):
tm.assert_series_equal(actual, expected)

@pytest.mark.parametrize(
"expected,window,minp",
[
([([1.0, np.nan, 3.0], [0, 1, 2]), ([np.nan, 3.0, 4.0], [1, 2, 3])], 3, 2),
([([1.0, np.nan, 3.0, 4.0], [0, 1, 2, 3])], 4, 3),
([], 4, 4),
],
)
def test_iterator_rolling_series_min_periods_nan(self, expected, window, minp):
series = Series([1, np.nan, 3, 4])

expected = [Series(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(
expected, series.rolling(window, min_periods=minp)
):
tm.assert_series_equal(actual, expected)

@pytest.mark.parametrize(
"expected,window,minp",
[
([({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2])], 3, 2),
([], 2, 2),
([], 3, 4),
],
)
def test_iterator_rolling_dataframe_min_periods_nan(self, expected, window, minp):
dataframe = DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]})

expected = [DataFrame(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(
expected, dataframe.rolling(window, min_periods=minp)
):
tm.assert_frame_equal(actual, expected)

@pytest.mark.parametrize(
"expected,window",
[
([([0], [0]), ([1], [1]), ([2], [2]), ([3], [3]), ([4], [4])], "1s"),
(
[
([0], [0]),
([0, 1], [0, 1]),
([1, 2], [1, 2]),
([2, 3], [2, 3]),
([3, 4], [3, 4]),
],
"2S",
),
(
[
([0], [0]),
([0, 1], [0, 1]),
([0, 1, 2], [0, 1, 2]),
([1, 2, 3], [1, 2, 3]),
([2, 3, 4], [2, 3, 4]),
],
"3S",
),
],
)
def test_iterator_rolling_series_time(self, expected, window):
series = Series(
range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s")
)

expected = [
Series(values, index=series.index[index]) for (values, index) in expected
]

for (expected, actual) in zip(expected, series.rolling(window)):
tm.assert_series_equal(actual, expected)

@pytest.mark.parametrize(
"expected,window,minp",
[
([], "1s", 2),
(
[
([0, 1], [0, 1]),
([1, 2], [1, 2]),
([2, 3], [2, 3]),
([3, 4], [3, 4]),
],
"2S",
2,
),
(
[
([0], [0]),
([0, 1], [0, 1]),
([0, 1, 2], [0, 1, 2]),
([1, 2, 3], [1, 2, 3]),
([2, 3, 4], [2, 3, 4]),
],
"3S",
1,
),
],
)
def test_iterator_rolling_series_time_min_periods(self, expected, window, minp):
series = Series(
range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s")
)

expected = [
Series(values, index=series.index[index]) for (values, index) in expected
]

for (expected, actual) in zip(
expected, series.rolling(window, min_periods=minp)
):
tm.assert_series_equal(actual, expected)
8 changes: 0 additions & 8 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,14 +291,6 @@ def test_multi_index_names(self):
tm.assert_index_equal(result.columns, df.columns)
assert result.index.names == [None, "1", "2"]

@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame])
def test_iter_raises(self, klass):
# https://github.com/pandas-dev/pandas/issues/11704
# Iteration over a Window
obj = klass([1, 2, 3, 4])
with pytest.raises(NotImplementedError):
iter(obj.rolling(2))

def test_rolling_axis_sum(self, axis_frame):
# see gh-23372.
df = DataFrame(np.ones((10, 20)))
Expand Down