Skip to content

Initial implementation of rolling iterators #27399

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
0cd558e
Initial implementation of rolling iterators
ThomasKluiters Jul 15, 2019
c33f2a4
Implement simple test cases
ThomasKluiters Jul 15, 2019
285ebcc
Add whatsnew entry
ThomasKluiters Jul 15, 2019
2d782c4
Fix Cython compile error
ThomasKluiters Jul 15, 2019
4271782
Add expanding test cases
ThomasKluiters Jul 15, 2019
68db60a
Implement date time index aware window
ThomasKluiters Jul 15, 2019
3b10a49
Reformat
ThomasKluiters Jul 15, 2019
525bdc6
Format window.pyx
ThomasKluiters Jul 15, 2019
c50a309
Fix failing tests
Jul 16, 2019
ee8f00e
Remove validate from iter
ThomasKluiters Jul 16, 2019
e1cf139
Merge branch 'master' of https://github.com/pandas-dev/pandas into ro…
ThomasKluiters Jul 16, 2019
803a18f
Refactor get_window_indexer to _Window
ThomasKluiters Jul 16, 2019
4c8adff
Merge branch 'master' of https://github.com/pandas-dev/pandas into ro…
ThomasKluiters Jul 17, 2019
1252dfd
Merge branch 'rolling-iterator' of https://www.github.com/ThomasKluit…
ThomasKluiters Jul 17, 2019
e6eb230
Remove enumerate
ThomasKluiters Jul 17, 2019
9833fa6
Add tests for min_periods
ThomasKluiters Jul 17, 2019
0a2b416
Reformat tests
ThomasKluiters Jul 17, 2019
6b559e9
Refactor tests
ThomasKluiters Jul 17, 2019
62f0997
Implement support for nan values
ThomasKluiters Jul 17, 2019
0d2d5b1
Rename duplicate test name
ThomasKluiters Jul 17, 2019
412efa7
Use count() instead of roll_sum()
ThomasKluiters Jul 18, 2019
df5f199
Merge branch 'master' of https://github.com/pandas-dev/pandas into ro…
ThomasKluiters Jul 18, 2019
285b5ba
Update create_blocks and _get_index
ThomasKluiters Jul 18, 2019
33e24cb
Refactor tests into test_iterator
ThomasKluiters Jul 21, 2019
4e2f1a2
Merge branch 'master' of https://github.com/pandas-dev/pandas into ro…
ThomasKluiters Jul 21, 2019
755e0c1
Move whatsnew entry to 1.0.0
ThomasKluiters Jul 21, 2019
d58e897
Add iterators to computation.rst
ThomasKluiters Jul 21, 2019
e3b060c
Add issue #
ThomasKluiters Jul 21, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1206,6 +1206,7 @@ Groupby/resample/rolling
- Improved :class:`pandas.core.window.Rolling`, :class:`pandas.core.window.Window` and :class:`pandas.core.window.EWM` functions to exclude nuisance columns from results instead of raising errors and raise a ``DataError`` only if all columns are nuisance (:issue:`12537`)
- Bug in :meth:`pandas.core.window.Rolling.max` and :meth:`pandas.core.window.Rolling.min` where incorrect results are returned with an empty variable window (:issue:`26005`)
- Raise a helpful exception when an unsupported weighted window function is used as an argument of :meth:`pandas.core.window.Window.aggregate` (:issue:`26597`)
- Improved :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Window` it is now possible to iterate over a window object (:issue:`12537`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to 1.0 whatsnew

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also provide a reference to the new doc section.


Reshaping
^^^^^^^^^
Expand Down
26 changes: 24 additions & 2 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,30 @@ def __repr__(self) -> str:
)

def __iter__(self):
url = "https://github.com/pandas-dev/pandas/issues/11704"
raise NotImplementedError("See issue #11704 {url}".format(url=url))
closed = self.closed
window = self._get_window()
minp = _use_window(self.min_periods, window)
offset = _offset(window, self.center)

blocks, obj = self._create_blocks()
index = self._get_index()

for values in blocks:
start, end, N, win, _minp, is_variable = libwindow.get_window_indexer(
np.asarray(values), window, minp, index, closed
)

for i in range(N):
if is_variable:
s = start[i]
e = end[i]
else:
s = max(i - win + offset + 1, 0)
e = min(i + offset + 1, N)

result = values.iloc[slice(s, e)]
if result.count().min() >= _minp:
yield result

def _get_index(self) -> Optional[np.ndarray]:
"""
Expand Down
63 changes: 56 additions & 7 deletions pandas/tests/window/test_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,62 @@ def test_missing_minp_zero(self):
expected = pd.Series([np.nan])
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame])
def test_iter_raises(self, klass):
# https://github.com/pandas-dev/pandas/issues/11704
# Iteration over a Window
obj = klass([1, 2, 3, 4])
with pytest.raises(NotImplementedError):
iter(obj.expanding(2))
@pytest.mark.parametrize(
"dataframe,expected,window",
[
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])],
3,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
2,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
1,
),
(DataFrame({"A": [1], "B": [4]}), [], 1337),
(DataFrame(), [({}, [])], 1337),
],
)
def test_iterator_dataframe(self, dataframe, expected, window):
expected = [DataFrame(values, index=index) for (values, index) in expected]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add the issue number as a comment here


for (expected, actual) in zip(
expected, dataframe.expanding(min_periods=window)
):
tm.assert_frame_equal(actual, expected)

@pytest.mark.parametrize(
"series,expected,window",
[
(Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3),
(Series([1, 2, 3]), [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2),
(
Series([1, 2, 3]),
[([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])],
1,
),
(Series([1, 2]), [([1, 2], [0, 1])], 1337),
(Series([]), [], 1337),
],
)
def test_iterator_series(self, series, expected, window):
expected = [Series(values, index=index) for (values, index) in expected]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same


for (expected, actual) in zip(expected, series.expanding(min_periods=window)):
tm.assert_series_equal(actual, expected)

def test_expanding_axis(self, axis_frame):
# see gh-23372.
Expand Down
133 changes: 125 additions & 8 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,131 @@ def test_constructor_timedelta_window_and_minperiods(self, window, raw):
tm.assert_frame_equal(result_roll_sum, expected)
tm.assert_frame_equal(result_roll_generic, expected)

@pytest.mark.parametrize(
"dataframe,expected,window",
[
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])],
3,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [2, 3], "B": [5, 6]}, [1, 2]),
],
2,
),
(
DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
[
({"A": [1], "B": [4]}, [0]),
({"A": [2], "B": [5]}, [1]),
({"A": [3], "B": [6]}, [2]),
],
1,
),
(DataFrame({"A": [1], "B": [4]}), [], 1337),
(DataFrame(), [({}, [])], 1337),
],
)
def test_iterator_dataframe(self, dataframe, expected, window):
expected = [DataFrame(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(expected, dataframe.rolling(window)):
tm.assert_frame_equal(actual, expected)

@pytest.mark.parametrize(
"series,expected,window",
[
(Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3),
(Series([1, 2, 3]), [([1, 2], [0, 1]), ([2, 3], [1, 2])], 2),
(Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1),
(Series([1, 2]), [([1, 2], [0, 1])], 1337),
(Series([]), [], 1337),
],
)
def test_iterator_series(self, series, expected, window):
expected = [Series(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(expected, series.rolling(window)):
tm.assert_series_equal(actual, expected)

@pytest.mark.parametrize(
"expected,window,minp",
[
(
[
({"A": [1, 2], "B": [4, 5]}, [0, 1]),
({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
],
3,
2,
),
([], 3, 4),
],
)
def test_iterator_dataframe_min_periods(self, expected, window, minp):
dataframe = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})

expected = [DataFrame(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(
expected, dataframe.rolling(window, min_periods=minp)
):
tm.assert_frame_equal(actual, expected)

@pytest.mark.parametrize(
"expected,window,minp",
[([([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3, 2), ([], 3, 4)],
)
def test_iterator_series_min_periods(self, expected, window, minp):
series = Series([1, 2, 3])

expected = [Series(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(
expected, series.rolling(window, min_periods=minp)
):
tm.assert_series_equal(actual, expected)

@pytest.mark.parametrize(
"expected,window,minp",
[
([([1.0, np.nan, 3.0], [0, 1, 2]), ([np.nan, 3.0, 4.0], [1, 2, 3])], 3, 2),
([([1.0, np.nan, 3.0, 4.0], [0, 1, 2, 3])], 4, 3),
([], 4, 4),
],
)
def test_iterator_series_min_periods_nan(self, expected, window, minp):
series = Series([1, np.nan, 3, 4])

expected = [Series(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(
expected, series.rolling(window, min_periods=minp)
):
tm.assert_series_equal(actual, expected)

@pytest.mark.parametrize(
"expected,window,minp",
[
([({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2])], 3, 2),
([], 2, 2),
([], 3, 4),
],
)
def test_iterator_dataframe_min_periods_nan(self, expected, window, minp):
dataframe = DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]})

expected = [DataFrame(values, index=index) for (values, index) in expected]

for (expected, actual) in zip(
expected, dataframe.rolling(window, min_periods=minp)
):
tm.assert_frame_equal(actual, expected)

@pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"])
def test_numpy_compat(self, method):
# see gh-12811
Expand Down Expand Up @@ -291,14 +416,6 @@ def test_multi_index_names(self):
tm.assert_index_equal(result.columns, df.columns)
assert result.index.names == [None, "1", "2"]

@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame])
def test_iter_raises(self, klass):
# https://github.com/pandas-dev/pandas/issues/11704
# Iteration over a Window
obj = klass([1, 2, 3, 4])
with pytest.raises(NotImplementedError):
iter(obj.rolling(2))

def test_rolling_axis_sum(self, axis_frame):
# see gh-23372.
df = DataFrame(np.ones((10, 20)))
Expand Down
79 changes: 79 additions & 0 deletions pandas/tests/window/test_timeseries_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,3 +690,82 @@ def test_rolling_cov_offset(self):

expected2 = ss.rolling(3, min_periods=1).cov()
tm.assert_series_equal(result, expected2)

@pytest.mark.parametrize(
"expected,window",
[
([([0], [0]), ([1], [1]), ([2], [2]), ([3], [3]), ([4], [4])], "1s"),
(
[
([0], [0]),
([0, 1], [0, 1]),
([1, 2], [1, 2]),
([2, 3], [2, 3]),
([3, 4], [3, 4]),
],
"2S",
),
(
[
([0], [0]),
([0, 1], [0, 1]),
([0, 1, 2], [0, 1, 2]),
([1, 2, 3], [1, 2, 3]),
([2, 3, 4], [2, 3, 4]),
],
"3S",
),
],
)
def test_iterator_series_rolling(self, expected, window):
series = Series(
range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s")
)

expected = [
Series(values, index=series.index[index]) for (values, index) in expected
]

for (expected, actual) in zip(expected, series.rolling(window)):
tm.assert_series_equal(actual, expected)

@pytest.mark.parametrize(
"expected,window,minp",
[
([], "1s", 2),
(
[
([0, 1], [0, 1]),
([1, 2], [1, 2]),
([2, 3], [2, 3]),
([3, 4], [3, 4]),
],
"2S",
2,
),
(
[
([0], [0]),
([0, 1], [0, 1]),
([0, 1, 2], [0, 1, 2]),
([1, 2, 3], [1, 2, 3]),
([2, 3, 4], [2, 3, 4]),
],
"3S",
1,
),
],
)
def test_iterator_series_rolling_min_periods(self, expected, window, minp):
series = Series(
range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s")
)

expected = [
Series(values, index=series.index[index]) for (values, index) in expected
]

for (expected, actual) in zip(
expected, series.rolling(window, min_periods=minp)
):
tm.assert_series_equal(actual, expected)