From 0cd558eb8ddf6a91789554ed0fcca63137866d14 Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 15 Jul 2019 17:40:44 +0200 Subject: [PATCH 01/23] Initial implementation of rolling iterators --- pandas/_libs/window.pyx | 34 ++++++++++++++++++++++++++++++++++ pandas/core/window.py | 18 ++++++++++++++++-- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 46e4b17b8164c..ba6a5c0274f82 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -354,6 +354,40 @@ def get_window_indexer(values, win, minp, index, closed, return indexer.get_data() +cdef class WindowIterator: + cdef: + int64_t i, s, e, N, win, minp + float64_t[:,:] values + int64_t[:] start, end + object index, is_variable + + def __init__(self, ndarray[float64_t, ndim=2] values, int64_t win, object index, object closed, int64_t minp): + self.values = values + self.index = index + self.i = 0 + + self.start, self.end, self.N, self.win, _, self.is_variable = get_window_indexer( + values, win, minp, None, closed + ) + + def __iter__(self): + return self + + def __next__(self): + if self.i >= self.N: + raise StopIteration + + if self.is_variable: + s = self.start[self.i] + e = self.end[self.i] + else: + s = int_max(self.i - self.win + 1, 0) + e = int_min(self.i + 1, self.N) + + self.i = self.i + 1 + + return self.values[slice(s, e),:] + # ---------------------------------------------------------------------- # Rolling count # this is only an impl for index not None, IOW, freq aware diff --git a/pandas/core/window.py b/pandas/core/window.py index 0c1f6a1a6dace..29a8bb5815bc9 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -204,8 +204,22 @@ def __repr__(self): ) def __iter__(self): - url = "https://github.com/pandas-dev/pandas/issues/11704" - raise NotImplementedError("See issue #11704 {url}".format(url=url)) + window = self._get_window() + minp = _use_window(self.min_periods, window) + + blocks, obj, index = self._create_blocks() + + iterators = [] + + for i, b in enumerate(blocks): + values = self._prep_values(b.values) + + if values.ndim == 1: + values = np.expand_dims(values, axis=1) + + iterators.append(libwindow.WindowIterator(values, window, b.index, self.closed, minp)) + + return (elem for iterator in iterators for elem in iterator) def _get_index(self, index=None): """ From c33f2a451dccb13a46afc874cb22ec507359b35b Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 15 Jul 2019 18:57:59 +0200 Subject: [PATCH 02/23] Implement simple test cases --- pandas/_libs/window.pyx | 15 ++++--- pandas/core/window.py | 11 ++--- pandas/tests/window/test_rolling.py | 68 +++++++++++++++++++++++++---- 3 files changed, 72 insertions(+), 22 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index ba6a5c0274f82..213cf152de9cc 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -357,17 +357,15 @@ def get_window_indexer(values, win, minp, index, closed, cdef class WindowIterator: cdef: int64_t i, s, e, N, win, minp - float64_t[:,:] values int64_t[:] start, end - object index, is_variable + object values, is_variable - def __init__(self, ndarray[float64_t, ndim=2] values, int64_t win, object index, object closed, int64_t minp): + def __init__(self, object values, int64_t win, object closed, int64_t minp): self.values = values - self.index = index self.i = 0 - self.start, self.end, self.N, self.win, _, self.is_variable = get_window_indexer( - values, win, minp, None, closed + self.start, self.end, self.N, self.win, self.minp, self.is_variable = get_window_indexer( + np.asarray(values), win, minp, None, closed ) def __iter__(self): @@ -386,7 +384,10 @@ cdef class WindowIterator: self.i = self.i + 1 - return self.values[slice(s, e),:] + if e - s < minp: + return self.__next__() + + return self.values.iloc[slice(s, e)] # ---------------------------------------------------------------------- # Rolling count diff --git a/pandas/core/window.py b/pandas/core/window.py index 29a8bb5815bc9..7c068115dff61 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -211,14 +211,11 @@ def __iter__(self): iterators = [] - for i, b in enumerate(blocks): - values = self._prep_values(b.values) - - if values.ndim == 1: - values = np.expand_dims(values, axis=1) + for i, values in enumerate(blocks): + iterators.append( + libwindow.WindowIterator(values, window, self.closed, minp) + ) - iterators.append(libwindow.WindowIterator(values, window, b.index, self.closed, minp)) - return (elem for iterator in iterators for elem in iterator) def _get_index(self, index=None): diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index c7177e1d3914f..2870d54f1ec3b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -98,6 +98,66 @@ def test_constructor_timedelta_window_and_minperiods(self, window, raw): tm.assert_frame_equal(result_roll_sum, expected) tm.assert_frame_equal(result_roll_generic, expected) + @pytest.mark.parametrize( + "dataframe,expected,window", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + 1, + ), + (DataFrame({"A": [1], "B": [4]}), [({"A": [1], "B": [4]}, [0])], 1337), + (DataFrame(), [({}, [])], 1337), + ], + ) + def test_iterator_dataframe(self, dataframe, expected, window): + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, dataframe.rolling(window)): + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "series,expected,window", + [ + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], + 3, + ), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2), + (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1), + (Series([1]), [([1], [0])], 1337), + (Series([]), [], 1337), + ], + ) + def test_iterator_series(self, series, expected, window): + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, series.rolling(window)): + tm.assert_series_equal(actual, expected) + @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) def test_numpy_compat(self, method): # see gh-12811 @@ -291,14 +351,6 @@ def test_multi_index_names(self): tm.assert_index_equal(result.columns, df.columns) assert result.index.names == [None, "1", "2"] - @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) - def test_iter_raises(self, klass): - # https://github.com/pandas-dev/pandas/issues/11704 - # Iteration over a Window - obj = klass([1, 2, 3, 4]) - with pytest.raises(NotImplementedError): - iter(obj.rolling(2)) - def test_rolling_axis_sum(self, axis_frame): # see gh-23372. df = DataFrame(np.ones((10, 20))) From 285ebcce90bfbdb6d4e0fb4bcc0aebe2e64ad1e4 Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 15 Jul 2019 18:58:10 +0200 Subject: [PATCH 03/23] Add whatsnew entry --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index eeaafd7ad7d51..6b1ce3cbf05af 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -1140,6 +1140,7 @@ Groupby/resample/rolling - Improved :class:`pandas.core.window.Rolling`, :class:`pandas.core.window.Window` and :class:`pandas.core.window.EWM` functions to exclude nuisance columns from results instead of raising errors and raise a ``DataError`` only if all columns are nuisance (:issue:`12537`) - Bug in :meth:`pandas.core.window.Rolling.max` and :meth:`pandas.core.window.Rolling.min` where incorrect results are returned with an empty variable window (:issue:`26005`) - Raise a helpful exception when an unsupported weighted window function is used as an argument of :meth:`pandas.core.window.Window.aggregate` (:issue:`26597`) +- Improved :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Window` it is now possible to iterate over a window object (:issue:`12537`) Reshaping ^^^^^^^^^ From 2d782c44e62151a650a3d38b021544d38e1189c3 Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 15 Jul 2019 19:33:12 +0200 Subject: [PATCH 04/23] Fix Cython compile error --- pandas/_libs/window.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 213cf152de9cc..6014b8f7cda7c 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -384,7 +384,7 @@ cdef class WindowIterator: self.i = self.i + 1 - if e - s < minp: + if e - s < self.minp: return self.__next__() return self.values.iloc[slice(s, e)] From 42717823b7304e53a8be0c7ec9e0551bfb56ec10 Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 15 Jul 2019 19:33:21 +0200 Subject: [PATCH 05/23] Add expanding test cases --- pandas/tests/window/test_expanding.py | 59 +++++++++++++++++++++++---- pandas/tests/window/test_rolling.py | 15 ++----- 2 files changed, 56 insertions(+), 18 deletions(-) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 1e92c981964c5..7e2da8f6a6a95 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -90,13 +90,58 @@ def test_missing_minp_zero(self): expected = pd.Series([np.nan]) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) - def test_iter_raises(self, klass): - # https://github.com/pandas-dev/pandas/issues/11704 - # Iteration over a Window - obj = klass([1, 2, 3, 4]) - with pytest.raises(NotImplementedError): - iter(obj.expanding(2)) + @pytest.mark.parametrize( + "dataframe,expected,window", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 1, + ), + (DataFrame({"A": [1], "B": [4]}), [], 1337), + (DataFrame(), [({}, [])], 1337), + ], + ) + def test_iterator_dataframe(self, dataframe, expected, window): + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, dataframe.expanding(window)): + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "series,expected,window", + [ + (Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3), + (Series([1, 2, 3]), [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1), + (Series([1, 2]), [([1, 2], [0, 1])], 1337), + (Series([]), [], 1337), + ], + ) + def test_iterator_series(self, series, expected, window): + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, series.expanding(window)): + tm.assert_series_equal(actual, expected) def test_expanding_axis(self, axis_frame): # see gh-23372. diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 2870d54f1ec3b..35300c3bc7e0c 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -104,8 +104,6 @@ def test_constructor_timedelta_window_and_minperiods(self, window, raw): ( DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), ], 3, @@ -113,7 +111,6 @@ def test_constructor_timedelta_window_and_minperiods(self, window, raw): ( DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), [ - ({"A": [1], "B": [4]}, [0]), ({"A": [1, 2], "B": [4, 5]}, [0, 1]), ({"A": [2, 3], "B": [5, 6]}, [1, 2]), ], @@ -128,7 +125,7 @@ def test_constructor_timedelta_window_and_minperiods(self, window, raw): ], 1, ), - (DataFrame({"A": [1], "B": [4]}), [({"A": [1], "B": [4]}, [0])], 1337), + (DataFrame({"A": [1], "B": [4]}), [], 1337), (DataFrame(), [({}, [])], 1337), ], ) @@ -141,14 +138,10 @@ def test_iterator_dataframe(self, dataframe, expected, window): @pytest.mark.parametrize( "series,expected,window", [ - ( - Series([1, 2, 3]), - [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], - 3, - ), - (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2), + (Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3), + (Series([1, 2, 3]), [([1, 2], [0, 1]), ([2, 3], [1, 2])], 2), (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1), - (Series([1]), [([1], [0])], 1337), + (Series([1, 2]), [([1, 2], [0, 1])], 1337), (Series([]), [], 1337), ], ) From 68db60a244288db889fff635e1dc3bd40ab28039 Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 15 Jul 2019 20:27:02 +0200 Subject: [PATCH 06/23] Implement date time index aware window --- pandas/_libs/window.pyx | 4 +- pandas/core/window.py | 4 +- pandas/tests/window/test_timeseries_window.py | 43 +++++++++++++++++++ 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 6014b8f7cda7c..77b21be7a332f 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -360,12 +360,12 @@ cdef class WindowIterator: int64_t[:] start, end object values, is_variable - def __init__(self, object values, int64_t win, object closed, int64_t minp): + def __init__(self, object values, object index, int64_t win, object closed, int64_t minp): self.values = values self.i = 0 self.start, self.end, self.N, self.win, self.minp, self.is_variable = get_window_indexer( - np.asarray(values), win, minp, None, closed + np.asarray(values), win, minp, index, closed ) def __iter__(self): diff --git a/pandas/core/window.py b/pandas/core/window.py index 7c068115dff61..2697a6779028a 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -204,16 +204,18 @@ def __repr__(self): ) def __iter__(self): + self.validate() window = self._get_window() minp = _use_window(self.min_periods, window) blocks, obj, index = self._create_blocks() + index, indexi = self._get_index(index) iterators = [] for i, values in enumerate(blocks): iterators.append( - libwindow.WindowIterator(values, window, self.closed, minp) + libwindow.WindowIterator(values, indexi, window, self.closed, minp) ) return (elem for iterator in iterators for elem in iterator) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index e057eadae9da8..b6b60fbe1674f 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -690,3 +690,46 @@ def test_rolling_cov_offset(self): expected2 = ss.rolling(3, min_periods=1).cov() tm.assert_series_equal(result, expected2) + + @pytest.mark.parametrize( + "series,expected,window", + [ + ( + Series(range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s")), + [ + ([0], [0]), + ([1], [1]), + ([2], [2]), + ([3], [3]), + ([4], [4])], + "1s" + ), + ( + Series(range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s")), + [ + ([0], [0]), + ([0, 1], [0, 1]), + ([1, 2], [1, 2]), + ([2, 3], [2, 3]), + ([3, 4], [3, 4]) + ], + "2S" + ), + ( + Series(range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s")), + [ + ([0], [0]), + ([0, 1], [0, 1]), + ([0, 1, 2], [0, 1, 2]), + ([1, 2, 3], [1, 2, 3]), + ([2, 3, 4], [2, 3, 4]), + ], + "3S" + ) + ], + ) + def test_iterator_series_rolling(self, series, expected, window): + expected = [Series(values, index=series.index[index]) for (values, index) in expected] + + for (expected, actual) in zip(expected, series.rolling(window)): + tm.assert_series_equal(actual, expected) \ No newline at end of file From 3b10a49ddf24d188bbe6182d249fa42589ba4b88 Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 15 Jul 2019 20:29:12 +0200 Subject: [PATCH 07/23] Reformat --- pandas/tests/window/test_expanding.py | 10 ++-- pandas/tests/window/test_rolling.py | 4 +- pandas/tests/window/test_timeseries_window.py | 46 +++++++++++-------- 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 7e2da8f6a6a95..78c0b85a6c1cb 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -95,9 +95,7 @@ def test_missing_minp_zero(self): [ ( DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), - ], + [({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])], 3, ), ( @@ -132,7 +130,11 @@ def test_iterator_dataframe(self, dataframe, expected, window): [ (Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3), (Series([1, 2, 3]), [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2), - (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1), + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], + 1, + ), (Series([1, 2]), [([1, 2], [0, 1])], 1337), (Series([]), [], 1337), ], diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 35300c3bc7e0c..d06c4f4f98a86 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -103,9 +103,7 @@ def test_constructor_timedelta_window_and_minperiods(self, window, raw): [ ( DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), - ], + [({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])], 3, ), ( diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index b6b60fbe1674f..e33122558f045 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -695,41 +695,47 @@ def test_rolling_cov_offset(self): "series,expected,window", [ ( - Series(range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s")), - [ - ([0], [0]), - ([1], [1]), - ([2], [2]), - ([3], [3]), - ([4], [4])], - "1s" + Series( + range(5), + index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s"), + ), + [([0], [0]), ([1], [1]), ([2], [2]), ([3], [3]), ([4], [4])], + "1s", ), ( - Series(range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s")), + Series( + range(5), + index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s"), + ), [ - ([0], [0]), - ([0, 1], [0, 1]), + ([0], [0]), + ([0, 1], [0, 1]), ([1, 2], [1, 2]), ([2, 3], [2, 3]), - ([3, 4], [3, 4]) + ([3, 4], [3, 4]), ], - "2S" + "2S", ), ( - Series(range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s")), + Series( + range(5), + index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s"), + ), [ ([0], [0]), - ([0, 1], [0, 1]), - ([0, 1, 2], [0, 1, 2]), + ([0, 1], [0, 1]), + ([0, 1, 2], [0, 1, 2]), ([1, 2, 3], [1, 2, 3]), ([2, 3, 4], [2, 3, 4]), ], - "3S" - ) + "3S", + ), ], ) def test_iterator_series_rolling(self, series, expected, window): - expected = [Series(values, index=series.index[index]) for (values, index) in expected] + expected = [ + Series(values, index=series.index[index]) for (values, index) in expected + ] for (expected, actual) in zip(expected, series.rolling(window)): - tm.assert_series_equal(actual, expected) \ No newline at end of file + tm.assert_series_equal(actual, expected) From 525bdc6c5207553d7cf09e671b0a54b1a531ad14 Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 15 Jul 2019 21:15:06 +0200 Subject: [PATCH 08/23] Format window.pyx --- pandas/_libs/window.pyx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 77b21be7a332f..e3fc8a97d7643 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -360,11 +360,13 @@ cdef class WindowIterator: int64_t[:] start, end object values, is_variable - def __init__(self, object values, object index, int64_t win, object closed, int64_t minp): + def __init__(self, object values, object index, + int64_t win, object closed, int64_t minp): self.values = values self.i = 0 - self.start, self.end, self.N, self.win, self.minp, self.is_variable = get_window_indexer( + self.start, self.end, self.N, + self.win, self.minp, self.is_variable = get_window_indexer( np.asarray(values), win, minp, index, closed ) @@ -374,7 +376,6 @@ cdef class WindowIterator: def __next__(self): if self.i >= self.N: raise StopIteration - if self.is_variable: s = self.start[self.i] e = self.end[self.i] From c50a30904e21d213fad2a943190e78225e29ea15 Mon Sep 17 00:00:00 2001 From: Thomas Kluiters Date: Tue, 16 Jul 2019 14:36:04 +0000 Subject: [PATCH 09/23] Fix failing tests --- pandas/_libs/window.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index e3fc8a97d7643..73ec06a230500 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -365,8 +365,8 @@ cdef class WindowIterator: self.values = values self.i = 0 - self.start, self.end, self.N, - self.win, self.minp, self.is_variable = get_window_indexer( + self.start, self.end, self.N, self.win, \ + self.minp, self.is_variable = get_window_indexer( np.asarray(values), win, minp, index, closed ) From ee8f00e67f22cf5ac35328c60aa4d7fe3d33b4c1 Mon Sep 17 00:00:00 2001 From: Thomas Kluiters Date: Tue, 16 Jul 2019 14:38:51 +0000 Subject: [PATCH 10/23] Remove validate from iter --- pandas/core/window.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 2697a6779028a..c2377dfd40cb5 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -204,7 +204,6 @@ def __repr__(self): ) def __iter__(self): - self.validate() window = self._get_window() minp = _use_window(self.min_periods, window) From 803a18f23956d8667c899a141b4e545dd986cfca Mon Sep 17 00:00:00 2001 From: thomas Date: Tue, 16 Jul 2019 20:18:21 +0200 Subject: [PATCH 11/23] Refactor get_window_indexer to _Window --- pandas/_libs/window.pyx | 36 ------------------------------------ pandas/core/window.py | 21 ++++++++++++++------- 2 files changed, 14 insertions(+), 43 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 310322b02532d..0a986942d2a09 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -354,42 +354,6 @@ def get_window_indexer(values, win, minp, index, closed, return indexer.get_data() -cdef class WindowIterator: - cdef: - int64_t i, s, e, N, win, minp - int64_t[:] start, end - object values, is_variable - - def __init__(self, object values, object index, - int64_t win, object closed, int64_t minp): - self.values = values - self.i = 0 - - self.start, self.end, self.N, self.win, \ - self.minp, self.is_variable = get_window_indexer( - np.asarray(values), win, minp, index, closed - ) - - def __iter__(self): - return self - - def __next__(self): - if self.i >= self.N: - raise StopIteration - if self.is_variable: - s = self.start[self.i] - e = self.end[self.i] - else: - s = int_max(self.i - self.win + 1, 0) - e = int_min(self.i + 1, self.N) - - self.i = self.i + 1 - - if e - s < self.minp: - return self.__next__() - - return self.values.iloc[slice(s, e)] - # ---------------------------------------------------------------------- # Rolling count # this is only an impl for index not None, IOW, freq aware diff --git a/pandas/core/window.py b/pandas/core/window.py index c2377dfd40cb5..4423a2a6bc556 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -208,16 +208,23 @@ def __iter__(self): minp = _use_window(self.min_periods, window) blocks, obj, index = self._create_blocks() - index, indexi = self._get_index(index) + _, indexi = self._get_index(index) - iterators = [] - - for i, values in enumerate(blocks): - iterators.append( - libwindow.WindowIterator(values, indexi, window, self.closed, minp) + for _, values in enumerate(blocks): + start, end, N, win, _minp, is_variable = libwindow.get_window_indexer( + np.asarray(values), window, minp, indexi, self.closed ) - return (elem for iterator in iterators for elem in iterator) + for i in range(N): + if is_variable: + s = start[i] + e = end[i] + else: + s = max(i - win + 1, 0) + e = min(i + 1, N) + + if e - s >= _minp: + yield values.iloc[slice(s, e)] def _get_index(self, index=None): """ From e6eb23032171c6f213443a33b19bb8ada463bf33 Mon Sep 17 00:00:00 2001 From: thomas Date: Wed, 17 Jul 2019 18:44:03 +0200 Subject: [PATCH 12/23] Remove enumerate --- pandas/core/window.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 4423a2a6bc556..6be3e5e6342be 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -210,7 +210,7 @@ def __iter__(self): blocks, obj, index = self._create_blocks() _, indexi = self._get_index(index) - for _, values in enumerate(blocks): + for values in blocks: start, end, N, win, _minp, is_variable = libwindow.get_window_indexer( np.asarray(values), window, minp, indexi, self.closed ) From 9833fa64e6eb851172966244f0416c2cdd52ea92 Mon Sep 17 00:00:00 2001 From: thomas Date: Wed, 17 Jul 2019 19:06:56 +0200 Subject: [PATCH 13/23] Add tests for min_periods --- pandas/tests/window/test_expanding.py | 4 +- pandas/tests/window/test_rolling.py | 39 ++++++++++++++ pandas/tests/window/test_timeseries_window.py | 51 +++++++++++++++++++ 3 files changed, 92 insertions(+), 2 deletions(-) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 78c0b85a6c1cb..a0fe1dbe56098 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -122,7 +122,7 @@ def test_missing_minp_zero(self): def test_iterator_dataframe(self, dataframe, expected, window): expected = [DataFrame(values, index=index) for (values, index) in expected] - for (expected, actual) in zip(expected, dataframe.expanding(window)): + for (expected, actual) in zip(expected, dataframe.expanding(min_periods=window)): tm.assert_frame_equal(actual, expected) @pytest.mark.parametrize( @@ -142,7 +142,7 @@ def test_iterator_dataframe(self, dataframe, expected, window): def test_iterator_series(self, series, expected, window): expected = [Series(values, index=index) for (values, index) in expected] - for (expected, actual) in zip(expected, series.expanding(window)): + for (expected, actual) in zip(expected, series.expanding(min_periods=window)): tm.assert_series_equal(actual, expected) def test_expanding_axis(self, axis_frame): diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index d06c4f4f98a86..5cb332ca6b55a 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -149,6 +149,45 @@ def test_iterator_series(self, series, expected, window): for (expected, actual) in zip(expected, series.rolling(window)): tm.assert_series_equal(actual, expected) + @pytest.mark.parametrize( + "dataframe,expected,window,minp", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]) + ], + 3, + 2 + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [], + 3, + 4 + ), + ], + ) + def test_iterator_dataframe_min_periods(self, dataframe, expected, window, minp): + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, dataframe.rolling(window, min_periods=minp)): + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "series,expected,window,minp", + [ + (Series([1, 2, 3]), [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3, 2), + (Series([1, 2, 3]), [], 3, 4) + ], + ) + def test_iterator_series_min_periods(self, series, expected, window, minp): + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, series.rolling(window, min_periods=minp)): + tm.assert_series_equal(actual, expected) + @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) def test_numpy_compat(self, method): # see gh-12811 diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index e33122558f045..7e6471c2876e9 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -739,3 +739,54 @@ def test_iterator_series_rolling(self, series, expected, window): for (expected, actual) in zip(expected, series.rolling(window)): tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "series,expected,window,minp", + [ + ( + Series( + range(5), + index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s"), + ), + [], + "1s", + 2 + ), + ( + Series( + range(5), + index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s"), + ), + [ + ([0, 1], [0, 1]), + ([1, 2], [1, 2]), + ([2, 3], [2, 3]), + ([3, 4], [3, 4]), + ], + "2S", + 2 + ), + ( + Series( + range(5), + index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s"), + ), + [ + ([0], [0]), + ([0, 1], [0, 1]), + ([0, 1, 2], [0, 1, 2]), + ([1, 2, 3], [1, 2, 3]), + ([2, 3, 4], [2, 3, 4]), + ], + "3S", + 1 + ), + ], + ) + def test_iterator_series_rolling_min_periods(self, series, expected, window, minp): + expected = [ + Series(values, index=series.index[index]) for (values, index) in expected + ] + + for (expected, actual) in zip(expected, series.rolling(window, min_periods=minp)): + tm.assert_series_equal(actual, expected) \ No newline at end of file From 0a2b41683cb5e8a6a2ce26d78e0cd46a63de0d9a Mon Sep 17 00:00:00 2001 From: thomas Date: Wed, 17 Jul 2019 19:10:04 +0200 Subject: [PATCH 14/23] Reformat tests --- pandas/tests/window/test_expanding.py | 4 +++- pandas/tests/window/test_rolling.py | 21 +++++++++---------- pandas/tests/window/test_timeseries_window.py | 12 ++++++----- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index a0fe1dbe56098..2a32e5448a627 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -122,7 +122,9 @@ def test_missing_minp_zero(self): def test_iterator_dataframe(self, dataframe, expected, window): expected = [DataFrame(values, index=index) for (values, index) in expected] - for (expected, actual) in zip(expected, dataframe.expanding(min_periods=window)): + for (expected, actual) in zip( + expected, dataframe.expanding(min_periods=window) + ): tm.assert_frame_equal(actual, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 5cb332ca6b55a..46c3770928563 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -156,36 +156,35 @@ def test_iterator_series(self, series, expected, window): DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), [ ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]) + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), ], 3, - 2 - ), - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [], - 3, - 4 + 2, ), + (DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), [], 3, 4), ], ) def test_iterator_dataframe_min_periods(self, dataframe, expected, window, minp): expected = [DataFrame(values, index=index) for (values, index) in expected] - for (expected, actual) in zip(expected, dataframe.rolling(window, min_periods=minp)): + for (expected, actual) in zip( + expected, dataframe.rolling(window, min_periods=minp) + ): tm.assert_frame_equal(actual, expected) @pytest.mark.parametrize( "series,expected,window,minp", [ (Series([1, 2, 3]), [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3, 2), - (Series([1, 2, 3]), [], 3, 4) + (Series([1, 2, 3]), [], 3, 4), ], ) def test_iterator_series_min_periods(self, series, expected, window, minp): expected = [Series(values, index=index) for (values, index) in expected] - for (expected, actual) in zip(expected, series.rolling(window, min_periods=minp)): + for (expected, actual) in zip( + expected, series.rolling(window, min_periods=minp) + ): tm.assert_series_equal(actual, expected) @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 7e6471c2876e9..2217c0d2fa8aa 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -750,7 +750,7 @@ def test_iterator_series_rolling(self, series, expected, window): ), [], "1s", - 2 + 2, ), ( Series( @@ -764,7 +764,7 @@ def test_iterator_series_rolling(self, series, expected, window): ([3, 4], [3, 4]), ], "2S", - 2 + 2, ), ( Series( @@ -779,7 +779,7 @@ def test_iterator_series_rolling(self, series, expected, window): ([2, 3, 4], [2, 3, 4]), ], "3S", - 1 + 1, ), ], ) @@ -788,5 +788,7 @@ def test_iterator_series_rolling_min_periods(self, series, expected, window, min Series(values, index=series.index[index]) for (values, index) in expected ] - for (expected, actual) in zip(expected, series.rolling(window, min_periods=minp)): - tm.assert_series_equal(actual, expected) \ No newline at end of file + for (expected, actual) in zip( + expected, series.rolling(window, min_periods=minp) + ): + tm.assert_series_equal(actual, expected) From 6b559e930fc8e05ad7fa896abd4e669432452de9 Mon Sep 17 00:00:00 2001 From: thomas Date: Wed, 17 Jul 2019 19:31:00 +0200 Subject: [PATCH 15/23] Refactor tests --- pandas/tests/window/test_rolling.py | 20 ++++---- pandas/tests/window/test_timeseries_window.py | 51 +++++-------------- 2 files changed, 24 insertions(+), 47 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 46c3770928563..4242b3076dfdf 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -150,10 +150,9 @@ def test_iterator_series(self, series, expected, window): tm.assert_series_equal(actual, expected) @pytest.mark.parametrize( - "dataframe,expected,window,minp", + "expected,window,minp", [ ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), [ ({"A": [1, 2], "B": [4, 5]}, [0, 1]), ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), @@ -161,10 +160,12 @@ def test_iterator_series(self, series, expected, window): 3, 2, ), - (DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), [], 3, 4), + ([], 3, 4), ], ) - def test_iterator_dataframe_min_periods(self, dataframe, expected, window, minp): + def test_iterator_dataframe_min_periods(self, expected, window, minp): + dataframe = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = [DataFrame(values, index=index) for (values, index) in expected] for (expected, actual) in zip( @@ -173,13 +174,12 @@ def test_iterator_dataframe_min_periods(self, dataframe, expected, window, minp) tm.assert_frame_equal(actual, expected) @pytest.mark.parametrize( - "series,expected,window,minp", - [ - (Series([1, 2, 3]), [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3, 2), - (Series([1, 2, 3]), [], 3, 4), - ], + "expected,window,minp", + [([([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3, 2), ([], 3, 4)], ) - def test_iterator_series_min_periods(self, series, expected, window, minp): + def test_iterator_series_min_periods(self, expected, window, minp): + series = Series([1, 2, 3]) + expected = [Series(values, index=index) for (values, index) in expected] for (expected, actual) in zip( diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 2217c0d2fa8aa..63e7015c09f70 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -692,21 +692,10 @@ def test_rolling_cov_offset(self): tm.assert_series_equal(result, expected2) @pytest.mark.parametrize( - "series,expected,window", + "expected,window", [ + ([([0], [0]), ([1], [1]), ([2], [2]), ([3], [3]), ([4], [4])], "1s"), ( - Series( - range(5), - index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s"), - ), - [([0], [0]), ([1], [1]), ([2], [2]), ([3], [3]), ([4], [4])], - "1s", - ), - ( - Series( - range(5), - index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s"), - ), [ ([0], [0]), ([0, 1], [0, 1]), @@ -717,10 +706,6 @@ def test_rolling_cov_offset(self): "2S", ), ( - Series( - range(5), - index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s"), - ), [ ([0], [0]), ([0, 1], [0, 1]), @@ -732,7 +717,11 @@ def test_rolling_cov_offset(self): ), ], ) - def test_iterator_series_rolling(self, series, expected, window): + def test_iterator_series_rolling(self, expected, window): + series = Series( + range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s") + ) + expected = [ Series(values, index=series.index[index]) for (values, index) in expected ] @@ -741,22 +730,10 @@ def test_iterator_series_rolling(self, series, expected, window): tm.assert_series_equal(actual, expected) @pytest.mark.parametrize( - "series,expected,window,minp", + "expected,window,minp", [ + ([], "1s", 2), ( - Series( - range(5), - index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s"), - ), - [], - "1s", - 2, - ), - ( - Series( - range(5), - index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s"), - ), [ ([0, 1], [0, 1]), ([1, 2], [1, 2]), @@ -767,10 +744,6 @@ def test_iterator_series_rolling(self, series, expected, window): 2, ), ( - Series( - range(5), - index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s"), - ), [ ([0], [0]), ([0, 1], [0, 1]), @@ -783,7 +756,11 @@ def test_iterator_series_rolling(self, series, expected, window): ), ], ) - def test_iterator_series_rolling_min_periods(self, series, expected, window, minp): + def test_iterator_series_rolling_min_periods(self, expected, window, minp): + series = Series( + range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s") + ) + expected = [ Series(values, index=series.index[index]) for (values, index) in expected ] From 62f099712846bf6e3e6f0d0b14713b61a2bf461f Mon Sep 17 00:00:00 2001 From: thomas Date: Wed, 17 Jul 2019 20:23:22 +0200 Subject: [PATCH 16/23] Implement support for nan values --- pandas/core/window.py | 36 ++++++++++++++++++++++------- pandas/tests/window/test_rolling.py | 36 +++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 6be3e5e6342be..8272e788051b9 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -204,26 +204,46 @@ def __repr__(self): ) def __iter__(self): + closed = self.closed window = self._get_window() minp = _use_window(self.min_periods, window) + offset = _offset(window, self.center) blocks, obj, index = self._create_blocks() _, indexi = self._get_index(index) for values in blocks: + arr = np.asarray(values) + start, end, N, win, _minp, is_variable = libwindow.get_window_indexer( - np.asarray(values), window, minp, indexi, self.closed + arr, window, minp, indexi, closed ) + if arr.ndim == 1: + arr = np.expand_dims(arr, axis=1) + + counts = libwindow.roll_sum( + np.concatenate( + [ + np.isfinite(arr).all(axis=1).astype(float), + np.array([0.0] * offset), + ] + ), + win, + minp, + index, + closed, + )[offset:] + for i in range(N): - if is_variable: - s = start[i] - e = end[i] - else: - s = max(i - win + 1, 0) - e = min(i + 1, N) + if counts[i] >= _minp: + if is_variable: + s = start[i] + e = end[i] + else: + s = max(i - win + offset + 1, 0) + e = min(i + offset + 1, N) - if e - s >= _minp: yield values.iloc[slice(s, e)] def _get_index(self, index=None): diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 4242b3076dfdf..747d290359d0c 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -187,6 +187,42 @@ def test_iterator_series_min_periods(self, expected, window, minp): ): tm.assert_series_equal(actual, expected) + @pytest.mark.parametrize( + "expected,window,minp", + [ + ([([1.0, np.nan, 3.0], [0, 1, 2]), ([np.nan, 3.0, 4.0], [1, 2, 3])], 3, 2), + ([([1.0, np.nan, 3.0, 4.0], [0, 1, 2, 3])], 4, 3), + ([], 4, 4), + ], + ) + def test_iterator_series_min_periods_nan(self, expected, window, minp): + series = Series([1, np.nan, 3, 4]) + + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, series.rolling(window, min_periods=minp) + ): + tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window,minp", + [ + ([({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2])], 3, 2), + ([], 2, 2), + ([], 3, 4), + ], + ) + def test_iterator_series_min_periods_nan(self, expected, window, minp): + dataframe = DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}) + + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, dataframe.rolling(window, min_periods=minp) + ): + tm.assert_frame_equal(actual, expected) + @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) def test_numpy_compat(self, method): # see gh-12811 From 0d2d5b1ab94292d40f949109b6dceb333b6e950d Mon Sep 17 00:00:00 2001 From: thomas Date: Wed, 17 Jul 2019 20:28:37 +0200 Subject: [PATCH 17/23] Rename duplicate test name --- pandas/tests/window/test_rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 747d290359d0c..5ef4476dff519 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -213,7 +213,7 @@ def test_iterator_series_min_periods_nan(self, expected, window, minp): ([], 3, 4), ], ) - def test_iterator_series_min_periods_nan(self, expected, window, minp): + def test_iterator_dataframe_min_periods_nan(self, expected, window, minp): dataframe = DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}) expected = [DataFrame(values, index=index) for (values, index) in expected] From 412efa74b8e91da7b62d3c7fc48d45481aaf32dd Mon Sep 17 00:00:00 2001 From: Thomas Kluiters Date: Thu, 18 Jul 2019 10:18:57 +0000 Subject: [PATCH 18/23] Use count() instead of roll_sum() --- pandas/core/window.py | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 8272e788051b9..e86a4c1d7cbdf 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -213,38 +213,21 @@ def __iter__(self): _, indexi = self._get_index(index) for values in blocks: - arr = np.asarray(values) - start, end, N, win, _minp, is_variable = libwindow.get_window_indexer( - arr, window, minp, indexi, closed + np.asarray(values), window, minp, indexi, closed ) - if arr.ndim == 1: - arr = np.expand_dims(arr, axis=1) - - counts = libwindow.roll_sum( - np.concatenate( - [ - np.isfinite(arr).all(axis=1).astype(float), - np.array([0.0] * offset), - ] - ), - win, - minp, - index, - closed, - )[offset:] - for i in range(N): - if counts[i] >= _minp: - if is_variable: - s = start[i] - e = end[i] - else: - s = max(i - win + offset + 1, 0) - e = min(i + offset + 1, N) + if is_variable: + s = start[i] + e = end[i] + else: + s = max(i - win + offset + 1, 0) + e = min(i + offset + 1, N) - yield values.iloc[slice(s, e)] + result = values.iloc[slice(s, e)] + if result.count().min() >= _minp: + yield result def _get_index(self, index=None): """ From 285b5ba34ee7e67ba6335c0f3a54efeb7b83bbb4 Mon Sep 17 00:00:00 2001 From: Thomas Kluiters Date: Thu, 18 Jul 2019 13:04:05 +0000 Subject: [PATCH 19/23] Update create_blocks and _get_index --- pandas/core/window.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 4e0a5fc4a1169..2576ca2c3d372 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -199,12 +199,12 @@ def __iter__(self): minp = _use_window(self.min_periods, window) offset = _offset(window, self.center) - blocks, obj, index = self._create_blocks() - _, indexi = self._get_index(index) + blocks, obj = self._create_blocks() + index = self._get_index() for values in blocks: start, end, N, win, _minp, is_variable = libwindow.get_window_indexer( - np.asarray(values), window, minp, indexi, closed + np.asarray(values), window, minp, index, closed ) for i in range(N): From 33e24cb9d5d63ff8e41442ff358ab3613193bf33 Mon Sep 17 00:00:00 2001 From: thomas Date: Sun, 21 Jul 2019 13:17:33 +0200 Subject: [PATCH 20/23] Refactor tests into test_iterator --- pandas/tests/window/test_expanding.py | 57 ---- pandas/tests/window/test_iterator.py | 269 ++++++++++++++++++ pandas/tests/window/test_rolling.py | 125 -------- pandas/tests/window/test_timeseries_window.py | 79 ----- 4 files changed, 269 insertions(+), 261 deletions(-) create mode 100644 pandas/tests/window/test_iterator.py diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 2a32e5448a627..103b67a9d7a18 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -90,63 +90,6 @@ def test_missing_minp_zero(self): expected = pd.Series([np.nan]) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize( - "dataframe,expected,window", - [ - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])], - 3, - ), - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), - ], - 2, - ), - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), - ], - 1, - ), - (DataFrame({"A": [1], "B": [4]}), [], 1337), - (DataFrame(), [({}, [])], 1337), - ], - ) - def test_iterator_dataframe(self, dataframe, expected, window): - expected = [DataFrame(values, index=index) for (values, index) in expected] - - for (expected, actual) in zip( - expected, dataframe.expanding(min_periods=window) - ): - tm.assert_frame_equal(actual, expected) - - @pytest.mark.parametrize( - "series,expected,window", - [ - (Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3), - (Series([1, 2, 3]), [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2), - ( - Series([1, 2, 3]), - [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], - 1, - ), - (Series([1, 2]), [([1, 2], [0, 1])], 1337), - (Series([]), [], 1337), - ], - ) - def test_iterator_series(self, series, expected, window): - expected = [Series(values, index=index) for (values, index) in expected] - - for (expected, actual) in zip(expected, series.expanding(min_periods=window)): - tm.assert_series_equal(actual, expected) - def test_expanding_axis(self, axis_frame): # see gh-23372. df = DataFrame(np.ones((10, 20))) diff --git a/pandas/tests/window/test_iterator.py b/pandas/tests/window/test_iterator.py new file mode 100644 index 0000000000000..e3d70be7015d8 --- /dev/null +++ b/pandas/tests/window/test_iterator.py @@ -0,0 +1,269 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Series, date_range +from pandas.tests.window.common import Base +import pandas.util.testing as tm + + +class TestExpanding(Base): + @pytest.mark.parametrize( + "dataframe,expected,window", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])], + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 1, + ), + (DataFrame({"A": [1], "B": [4]}), [], 1337), + (DataFrame(), [({}, [])], 1337), + ], + ) + def test_iterator_expanding_dataframe(self, dataframe, expected, window): + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, dataframe.expanding(min_periods=window) + ): + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "series,expected,window", + [ + (Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3), + (Series([1, 2, 3]), [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2), + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], + 1, + ), + (Series([1, 2]), [([1, 2], [0, 1])], 1337), + (Series([]), [], 1337), + ], + ) + def test_iterator_expanding_series(self, series, expected, window): + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, series.expanding(min_periods=window)): + tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "dataframe,expected,window", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])], + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + 1, + ), + (DataFrame({"A": [1], "B": [4]}), [], 1337), + (DataFrame(), [({}, [])], 1337), + ], + ) + def test_iterator_rolling_dataframe(self, dataframe, expected, window): + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, dataframe.rolling(window)): + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "series,expected,window", + [ + (Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3), + (Series([1, 2, 3]), [([1, 2], [0, 1]), ([2, 3], [1, 2])], 2), + (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1), + (Series([1, 2]), [([1, 2], [0, 1])], 1337), + (Series([]), [], 1337), + ], + ) + def test_iterator_rolling_series(self, series, expected, window): + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, series.rolling(window)): + tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window,minp", + [ + ( + [ + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 3, + 2, + ), + ([], 3, 4), + ], + ) + def test_iterator_rolling_dataframe_min_periods(self, expected, window, minp): + dataframe = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, dataframe.rolling(window, min_periods=minp) + ): + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window,minp", + [([([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3, 2), ([], 3, 4)], + ) + def test_iterator_rolling_series_min_periods(self, expected, window, minp): + series = Series([1, 2, 3]) + + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, series.rolling(window, min_periods=minp) + ): + tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window,minp", + [ + ([([1.0, np.nan, 3.0], [0, 1, 2]), ([np.nan, 3.0, 4.0], [1, 2, 3])], 3, 2), + ([([1.0, np.nan, 3.0, 4.0], [0, 1, 2, 3])], 4, 3), + ([], 4, 4), + ], + ) + def test_iterator_rolling_series_min_periods_nan(self, expected, window, minp): + series = Series([1, np.nan, 3, 4]) + + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, series.rolling(window, min_periods=minp) + ): + tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window,minp", + [ + ([({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2])], 3, 2), + ([], 2, 2), + ([], 3, 4), + ], + ) + def test_iterator_rolling_dataframe_min_periods_nan(self, expected, window, minp): + dataframe = DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}) + + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, dataframe.rolling(window, min_periods=minp) + ): + tm.assert_frame_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window", + [ + ([([0], [0]), ([1], [1]), ([2], [2]), ([3], [3]), ([4], [4])], "1s"), + ( + [ + ([0], [0]), + ([0, 1], [0, 1]), + ([1, 2], [1, 2]), + ([2, 3], [2, 3]), + ([3, 4], [3, 4]), + ], + "2S", + ), + ( + [ + ([0], [0]), + ([0, 1], [0, 1]), + ([0, 1, 2], [0, 1, 2]), + ([1, 2, 3], [1, 2, 3]), + ([2, 3, 4], [2, 3, 4]), + ], + "3S", + ), + ], + ) + def test_iterator_rolling_series_time(self, expected, window): + series = Series( + range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s") + ) + + expected = [ + Series(values, index=series.index[index]) for (values, index) in expected + ] + + for (expected, actual) in zip(expected, series.rolling(window)): + tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "expected,window,minp", + [ + ([], "1s", 2), + ( + [ + ([0, 1], [0, 1]), + ([1, 2], [1, 2]), + ([2, 3], [2, 3]), + ([3, 4], [3, 4]), + ], + "2S", + 2, + ), + ( + [ + ([0], [0]), + ([0, 1], [0, 1]), + ([0, 1, 2], [0, 1, 2]), + ([1, 2, 3], [1, 2, 3]), + ([2, 3, 4], [2, 3, 4]), + ], + "3S", + 1, + ), + ], + ) + def test_iterator_rolling_series_time_min_periods(self, expected, window, minp): + series = Series( + range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s") + ) + + expected = [ + Series(values, index=series.index[index]) for (values, index) in expected + ] + + for (expected, actual) in zip( + expected, series.rolling(window, min_periods=minp) + ): + tm.assert_series_equal(actual, expected) \ No newline at end of file diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 5ef4476dff519..d4966706d1731 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -98,131 +98,6 @@ def test_constructor_timedelta_window_and_minperiods(self, window, raw): tm.assert_frame_equal(result_roll_sum, expected) tm.assert_frame_equal(result_roll_generic, expected) - @pytest.mark.parametrize( - "dataframe,expected,window", - [ - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2])], - 3, - ), - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [2, 3], "B": [5, 6]}, [1, 2]), - ], - 2, - ), - ( - DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), - [ - ({"A": [1], "B": [4]}, [0]), - ({"A": [2], "B": [5]}, [1]), - ({"A": [3], "B": [6]}, [2]), - ], - 1, - ), - (DataFrame({"A": [1], "B": [4]}), [], 1337), - (DataFrame(), [({}, [])], 1337), - ], - ) - def test_iterator_dataframe(self, dataframe, expected, window): - expected = [DataFrame(values, index=index) for (values, index) in expected] - - for (expected, actual) in zip(expected, dataframe.rolling(window)): - tm.assert_frame_equal(actual, expected) - - @pytest.mark.parametrize( - "series,expected,window", - [ - (Series([1, 2, 3]), [([1, 2, 3], [0, 1, 2])], 3), - (Series([1, 2, 3]), [([1, 2], [0, 1]), ([2, 3], [1, 2])], 2), - (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1), - (Series([1, 2]), [([1, 2], [0, 1])], 1337), - (Series([]), [], 1337), - ], - ) - def test_iterator_series(self, series, expected, window): - expected = [Series(values, index=index) for (values, index) in expected] - - for (expected, actual) in zip(expected, series.rolling(window)): - tm.assert_series_equal(actual, expected) - - @pytest.mark.parametrize( - "expected,window,minp", - [ - ( - [ - ({"A": [1, 2], "B": [4, 5]}, [0, 1]), - ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), - ], - 3, - 2, - ), - ([], 3, 4), - ], - ) - def test_iterator_dataframe_min_periods(self, expected, window, minp): - dataframe = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - - expected = [DataFrame(values, index=index) for (values, index) in expected] - - for (expected, actual) in zip( - expected, dataframe.rolling(window, min_periods=minp) - ): - tm.assert_frame_equal(actual, expected) - - @pytest.mark.parametrize( - "expected,window,minp", - [([([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3, 2), ([], 3, 4)], - ) - def test_iterator_series_min_periods(self, expected, window, minp): - series = Series([1, 2, 3]) - - expected = [Series(values, index=index) for (values, index) in expected] - - for (expected, actual) in zip( - expected, series.rolling(window, min_periods=minp) - ): - tm.assert_series_equal(actual, expected) - - @pytest.mark.parametrize( - "expected,window,minp", - [ - ([([1.0, np.nan, 3.0], [0, 1, 2]), ([np.nan, 3.0, 4.0], [1, 2, 3])], 3, 2), - ([([1.0, np.nan, 3.0, 4.0], [0, 1, 2, 3])], 4, 3), - ([], 4, 4), - ], - ) - def test_iterator_series_min_periods_nan(self, expected, window, minp): - series = Series([1, np.nan, 3, 4]) - - expected = [Series(values, index=index) for (values, index) in expected] - - for (expected, actual) in zip( - expected, series.rolling(window, min_periods=minp) - ): - tm.assert_series_equal(actual, expected) - - @pytest.mark.parametrize( - "expected,window,minp", - [ - ([({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2])], 3, 2), - ([], 2, 2), - ([], 3, 4), - ], - ) - def test_iterator_dataframe_min_periods_nan(self, expected, window, minp): - dataframe = DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}) - - expected = [DataFrame(values, index=index) for (values, index) in expected] - - for (expected, actual) in zip( - expected, dataframe.rolling(window, min_periods=minp) - ): - tm.assert_frame_equal(actual, expected) - @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) def test_numpy_compat(self, method): # see gh-12811 diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 63e7015c09f70..e057eadae9da8 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -690,82 +690,3 @@ def test_rolling_cov_offset(self): expected2 = ss.rolling(3, min_periods=1).cov() tm.assert_series_equal(result, expected2) - - @pytest.mark.parametrize( - "expected,window", - [ - ([([0], [0]), ([1], [1]), ([2], [2]), ([3], [3]), ([4], [4])], "1s"), - ( - [ - ([0], [0]), - ([0, 1], [0, 1]), - ([1, 2], [1, 2]), - ([2, 3], [2, 3]), - ([3, 4], [3, 4]), - ], - "2S", - ), - ( - [ - ([0], [0]), - ([0, 1], [0, 1]), - ([0, 1, 2], [0, 1, 2]), - ([1, 2, 3], [1, 2, 3]), - ([2, 3, 4], [2, 3, 4]), - ], - "3S", - ), - ], - ) - def test_iterator_series_rolling(self, expected, window): - series = Series( - range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s") - ) - - expected = [ - Series(values, index=series.index[index]) for (values, index) in expected - ] - - for (expected, actual) in zip(expected, series.rolling(window)): - tm.assert_series_equal(actual, expected) - - @pytest.mark.parametrize( - "expected,window,minp", - [ - ([], "1s", 2), - ( - [ - ([0, 1], [0, 1]), - ([1, 2], [1, 2]), - ([2, 3], [2, 3]), - ([3, 4], [3, 4]), - ], - "2S", - 2, - ), - ( - [ - ([0], [0]), - ([0, 1], [0, 1]), - ([0, 1, 2], [0, 1, 2]), - ([1, 2, 3], [1, 2, 3]), - ([2, 3, 4], [2, 3, 4]), - ], - "3S", - 1, - ), - ], - ) - def test_iterator_series_rolling_min_periods(self, expected, window, minp): - series = Series( - range(5), index=date_range(start="2016-01-01 09:30:00", periods=5, freq="s") - ) - - expected = [ - Series(values, index=series.index[index]) for (values, index) in expected - ] - - for (expected, actual) in zip( - expected, series.rolling(window, min_periods=minp) - ): - tm.assert_series_equal(actual, expected) From 755e0c1bdb6ae9beb84cae975791db53c2121288 Mon Sep 17 00:00:00 2001 From: thomas Date: Sun, 21 Jul 2019 13:20:52 +0200 Subject: [PATCH 21/23] Move whatsnew entry to 1.0.0 --- doc/source/whatsnew/v0.25.0.rst | 1 - doc/source/whatsnew/v1.0.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 18a38bbf7a323..42e756635e739 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -1207,7 +1207,6 @@ Groupby/resample/rolling - Improved :class:`pandas.core.window.Rolling`, :class:`pandas.core.window.Window` and :class:`pandas.core.window.EWM` functions to exclude nuisance columns from results instead of raising errors and raise a ``DataError`` only if all columns are nuisance (:issue:`12537`) - Bug in :meth:`pandas.core.window.Rolling.max` and :meth:`pandas.core.window.Rolling.min` where incorrect results are returned with an empty variable window (:issue:`26005`) - Raise a helpful exception when an unsupported weighted window function is used as an argument of :meth:`pandas.core.window.Window.aggregate` (:issue:`26597`) -- Improved :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Window` it is now possible to iterate over a window object (:issue:`12537`) Reshaping ^^^^^^^^^ diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 9caf127553e05..921e3be41236e 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -163,7 +163,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Improved :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Window` it is now possible to iterate over a window object (:issue:`12537`) - Reshaping From d58e897ecbc4dbc11bd26c8dd86b113e58be143c Mon Sep 17 00:00:00 2001 From: thomas Date: Sun, 21 Jul 2019 13:45:32 +0200 Subject: [PATCH 22/23] Add iterators to computation.rst --- doc/source/user_guide/computation.rst | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index 4f44fcaab63d4..b97fbc3c7ac2d 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -403,6 +403,28 @@ For some windowing functions, additional parameters must be specified: such that the weights are normalized with respect to each other. Weights of ``[1, 1, 1]`` and ``[2, 2, 2]`` yield the same result. +Iterating over windows +~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.0.0 + +It is also possible to iterate over a series or dataframe with a window function. Currently ``rolling`` and ``expanding`` are supported. + +Iterating over a ``Series`` will yield ``Series`` objects for each window whereas iterating over a ``DataFrame`` object +will yield ``DataFrame`` objects. + +.. ipython:: python + + s = pd.Series(np.random.randn(5)).rolling(2) + for x in s: + x + +.. ipython:: python + + df = pd.DataFrame({ "A": [1, 2, 3], "B": [4, 5, 6]}) + for x in df: + x + .. _stats.moments.ts: Time-aware rolling From e3b060c19f6f3aa89e4ffd79b55383d296e40a20 Mon Sep 17 00:00:00 2001 From: thomas Date: Sun, 21 Jul 2019 13:47:33 +0200 Subject: [PATCH 23/23] Add issue # --- pandas/tests/window/test_iterator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/window/test_iterator.py b/pandas/tests/window/test_iterator.py index e3d70be7015d8..aaa2569011f2b 100644 --- a/pandas/tests/window/test_iterator.py +++ b/pandas/tests/window/test_iterator.py @@ -6,6 +6,7 @@ import pandas.util.testing as tm +# Tests for GH11704 class TestExpanding(Base): @pytest.mark.parametrize( "dataframe,expected,window", @@ -266,4 +267,4 @@ def test_iterator_rolling_series_time_min_periods(self, expected, window, minp): for (expected, actual) in zip( expected, series.rolling(window, min_periods=minp) ): - tm.assert_series_equal(actual, expected) \ No newline at end of file + tm.assert_series_equal(actual, expected)