Skip to content

ENH: Implement __iter__ for Rolling and Expanding #34201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
May 17, 2020
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ Other enhancements
:class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`,
and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`).
- :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`).
- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`)

.. ---------------------------------------------------------------------------
Expand Down
29 changes: 27 additions & 2 deletions pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,33 @@ def __repr__(self) -> str:
return f"{self._window_type} [{attrs}]"

def __iter__(self):
url = "https://github.com/pandas-dev/pandas/issues/11704"
raise NotImplementedError(f"See issue #11704 {url}")
window = self._get_window(win_type=None)

blocks, obj = self._create_blocks()
block_list = list(blocks)
index = self._get_window_indexer(window=window)

# Choose the min between min_periods and window to determine the output size
if self.min_periods is None:
iter_threshold = window
else:
iter_threshold = min(window, self.min_periods)

for block in block_list:
start, end = index.get_window_bounds(
num_values=len(block),
min_periods=self.min_periods,
center=self.center,
closed=self.closed,
)
# From get_window_bounds, those two should be equal in length of array
assert len(start) == len(end)

window_size = len(start)
for i in range(window_size):
result = block.iloc[slice(start[i], end[i])]
if result.count().min() >= iter_threshold:
yield result

def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray:
"""Convert input to numpy arrays for Cython routines"""
Expand Down
83 changes: 74 additions & 9 deletions pandas/tests/window/test_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,6 @@ def test_missing_minp_zero():
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame])
def test_iter_raises(klass):
# https://github.com/pandas-dev/pandas/issues/11704
# Iteration over a Window
obj = klass([1, 2, 3, 4])
with pytest.raises(NotImplementedError):
iter(obj.expanding(2))


def test_expanding_axis(axis_frame):
# see gh-23372.
df = DataFrame(np.ones((10, 20)))
Expand Down Expand Up @@ -131,3 +122,77 @@ def test_expanding_count_default_min_periods_with_null_values(constructor):
result = constructor(values).expanding().count()
expected = constructor(expected_counts)
tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"df,expected,min_periods",
[
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])],
3,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
2,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
1,
),
(DataFrame({"A": [1], "B": [4]}), [], 2),
(DataFrame(), [({}, [])], 1),
(
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
[({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2])],
3,
),
(
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
[({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2])],
2,
),
(
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
[
({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
],
1,
),
],
)
def test_iter_expanding_dataframe(df, expected, min_periods):
# GH 11704
expected = [DataFrame(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(expected, df.expanding(min_periods)):
tm.assert_frame_equal(actual, expected)


@pytest.mark.parametrize(
"ser,expected,min_periods",
[
(Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3),
(Series([1, 2, 3]), [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2),
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1),
(Series([1, 2]), [([1, 2], [0, 1])], 2),
(Series([np.nan, 2]), [([np.nan, 2], [0, np.nan])], 2),
(Series([], dtype="int64"), [], 2),
],
)
def test_iter_expanding_series(ser, expected, min_periods):
# GH 11704
expected = [Series(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(expected, ser.expanding(min_periods)):
tm.assert_series_equal(actual, expected)
96 changes: 84 additions & 12 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,18 +310,6 @@ def test_multi_index_names():
assert result.index.names == [None, "1", "2"]


@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame])
def test_iter_raises(klass):
# https://github.com/pandas-dev/pandas/issues/11704
# Iteration over a Window
obj = klass([1, 2, 3, 4])

msg = "See issue #11704 https://github.com/pandas-dev/pandas/issues/11704"

with pytest.raises(NotImplementedError, match=msg):
iter(obj.rolling(2))


def test_rolling_axis_sum(axis_frame):
# see gh-23372.
df = DataFrame(np.ones((10, 20)))
Expand Down Expand Up @@ -470,3 +458,87 @@ def test_rolling_count_default_min_periods_with_null_values(constructor):
result = constructor(values).rolling(3).count()
expected = constructor(expected_counts)
tm.assert_equal(result, expected)


@pytest.mark.parametrize(
"df,expected,window,min_periods",
[
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])],
3,
None,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [2, 3], "B": [5, 6]}, [1, 2]),
],
2,
1,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [2, 3], "B": [5, 6]}, [1, 2]),
],
2,
3,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [2], "B": [5]}, [1]),
({"A": [3], "B": [6]}, [2]),
],
1,
1,
),
(DataFrame({"A": [1], "B": [4]}), [], 2, None),
(DataFrame({"A": [1], "B": [4]}), [], 2, 1),
(DataFrame(), [({}, [])], 2, None),
(DataFrame(), [({}, [])], 1, 2),
(
DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
[({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2])],
3,
2,
),
],
)
def test_iter_rolling_dataframe(df, expected, window, min_periods):
# GH 11704
expected = [DataFrame(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(
expected, df.rolling(window, min_periods=min_periods)
):
tm.assert_frame_equal(actual, expected)


@pytest.mark.parametrize(
"ser,expected,window, min_periods",
[
(Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3, None),
(Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 1),
(Series([1, 2, 3]), [([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 3),
(Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 0),
(Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 2),
(Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2, 0),
(Series([1, 2]), [([1, 2], [0, 1])], 2, 3),
(Series([], dtype="int64"), [], 2, 1),
(Series([], dtype="int64"), [], 2, 3),
],
)
def test_iter_rolling_series(ser, expected, window, min_periods):
# GH 11704
expected = [Series(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(
expected, ser.rolling(window, min_periods=min_periods)
):
tm.assert_series_equal(actual, expected)