From b1fb75af09691b7b3149e6d74cdc632a748e86e1 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Fri, 16 Oct 2020 23:03:40 -0700 Subject: [PATCH 01/12] ENH: Implement closed for fixed windows --- doc/source/user_guide/computation.rst | 7 ++----- pandas/core/window/indexers.py | 5 +++++ pandas/core/window/rolling.py | 10 +--------- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index b24020848b363..b9f0683697ba6 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -652,9 +652,9 @@ parameter: :header: "``closed``", "Description", "Default for" :widths: 20, 30, 30 - ``right``, close right endpoint, time-based windows + ``right``, close right endpoint, ``left``, close left endpoint, - ``both``, close both endpoints, fixed windows + ``both``, close both endpoints, ``neither``, open endpoints, For example, having the right endpoint open is useful in many problems that require that there is no contamination @@ -681,9 +681,6 @@ from present information back to past information. This allows the rolling windo df -Currently, this feature is only implemented for time-based windows. -For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed. - .. _stats.iter_rolling_window: Iteration over window: diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index 71e77f97d8797..0b6b6a6a1a69f 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -86,6 +86,11 @@ def get_window_bounds( end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64") start = end - self.window_size + if closed in ["left", "both"]: + start -= 1 + if closed in ["left", "neither"]: + end = -1 + end = np.clip(end, 0, num_values) start = np.clip(start, 0, num_values) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 1fcc47931e882..53ecea3e24819 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -850,10 +850,7 @@ class Window(BaseWindow): axis : int or str, default 0 closed : str, default None Make the interval closed on the 'right', 'left', 'both' or - 'neither' endpoints. - For offset-based windows, it defaults to 'right'. - For fixed windows, defaults to 'both'. Remaining cases not implemented - for fixed windows. + 'neither' endpoints. Defaults to 'right'. Returns ------- @@ -1976,11 +1973,6 @@ def validate(self): elif self.window < 0: raise ValueError("window must be non-negative") - if not self.is_datetimelike and self.closed is not None: - raise ValueError( - "closed only implemented for datetimelike and offset based windows" - ) - def _determine_window_length(self) -> Union[int, float]: """ Calculate freq for PeriodIndexes based on Index freq. Can not use From 0e1b553a2a456f4c298a6734a317bf39ce4dbc5a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 17 Oct 2020 13:04:51 -0700 Subject: [PATCH 02/12] Add tests and fix end point calc --- pandas/core/window/indexers.py | 6 +++--- pandas/tests/window/test_rolling.py | 16 ++++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index 0b6b6a6a1a69f..d3011764beaa8 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -85,11 +85,10 @@ def get_window_bounds( end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64") start = end - self.window_size - if closed in ["left", "both"]: start -= 1 if closed in ["left", "neither"]: - end = -1 + end -= 1 end = np.clip(end, 0, num_values) start = np.clip(start, 0, num_values) @@ -109,9 +108,10 @@ def get_window_bounds( closed: Optional[str] = None, ) -> Tuple[np.ndarray, np.ndarray]: - return calculate_variable_window_bounds( + start, end = calculate_variable_window_bounds( num_values, self.window_size, min_periods, center, closed, self.index_array ) + return start, end class VariableOffsetWindowIndexer(BaseIndexer): diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index f1d54d91c6d22..f18b1e22d4351 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -122,14 +122,18 @@ def test_numpy_compat(method): getattr(r, method)(dtype=np.float64) -def test_closed(): - df = DataFrame({"A": [0, 1, 2, 3, 4]}) - # closed only allowed for datetimelike +@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) +def test_closed_fixed(closed, arithmetic_win_operators): + func_name = arithmetic_win_operators + df_fixed = DataFrame({"A": [0, 1, 2, 3, 4]}) + df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5)) - msg = "closed only implemented for datetimelike and offset based windows" + result = getattr(df_fixed.rolling(2, closed=closed, min_periods=1), func_name)() + expected = getattr(df_time.rolling("2D", closed=closed), func_name)().reset_index( + drop=True + ) - with pytest.raises(ValueError, match=msg): - df.rolling(window=3, closed="neither") + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("closed", ["neither", "left"]) From 3de94782aeeb65f6ef5ce4e332ebe8216fa1e423 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 17 Oct 2020 13:05:37 -0700 Subject: [PATCH 03/12] Add gh reference --- pandas/tests/window/test_rolling.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index f18b1e22d4351..9307e14316448 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -124,6 +124,7 @@ def test_numpy_compat(method): @pytest.mark.parametrize("closed", ["left", "right", "both", "neither"]) def test_closed_fixed(closed, arithmetic_win_operators): + # GH 34315 func_name = arithmetic_win_operators df_fixed = DataFrame({"A": [0, 1, 2, 3, 4]}) df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5)) From c19fb2e93c1feb3fc12c43bf774816e276283ece Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 17 Oct 2020 13:36:33 -0700 Subject: [PATCH 04/12] Clean variable window indexer --- pandas/_libs/window/indexers.pyx | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 9af1159a805ec..6a49a5bb34855 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -43,16 +43,14 @@ def calculate_variable_window_bounds( (ndarray[int64], ndarray[int64]) """ cdef: - bint left_closed = False - bint right_closed = False - int index_growth_sign = 1 + bint left_closed = False, right_closed = False ndarray[int64_t, ndim=1] start, end - int64_t start_bound, end_bound + int64_t start_bound, end_bound, index_growth_sign = 1 Py_ssize_t i, j - # if windows is variable, default is 'right', otherwise default is 'both' + # default is 'right' if closed is None: - closed = 'right' if index is not None else 'both' + closed = 'right' if closed in ['right', 'both']: right_closed = True From 1c3a1861796bba6660bb3b674493e4582fd978f1 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 17 Oct 2020 13:41:28 -0700 Subject: [PATCH 05/12] Add test from original issue --- pandas/tests/window/test_rolling.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 9307e14316448..81ebd7560345b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -137,6 +137,24 @@ def test_closed_fixed(closed, arithmetic_win_operators): tm.assert_frame_equal(result, expected) +def test_closed_fixed_binary_col(): + # GH 34315 + data = [0, 1, 1, 0, 0, 1, 0, 1] + df = DataFrame( + {"binary_col": data}, + index=pd.date_range(start="2020-01-01", freq="min", periods=len(data)), + ) + + rolling = df.rolling(window=len(df), closed="left", min_periods=1) + result = rolling.mean() + expected = DataFrame( + [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571], + columns=["binary_col"], + index=pd.date_range(start="2020-01-01", freq="min", periods=len(data)), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("closed", ["neither", "left"]) def test_closed_empty(closed, arithmetic_win_operators): # GH 26005 From 0d855eba733eda57ce6c6b3dd11e26a4f8897676 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 17 Oct 2020 13:43:27 -0700 Subject: [PATCH 06/12] Add whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d8961f5fdb959..7ad3e37de62e6 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -221,6 +221,7 @@ Other enhancements - :meth:`Rolling.var()` and :meth:`Rolling.std()` use Kahan summation and Welfords Method to avoid numerical issues (:issue:`37051`) - :meth:`DataFrame.plot` now recognizes ``xlabel`` and ``ylabel`` arguments for plots of type ``scatter`` and ``hexbin`` (:issue:`37001`) - :class:`DataFrame` now supports ``divmod`` operation (:issue:`37165`) +- :class:`Rolling` now supports the ``closed`` argument for fixed windows (:issue`34315`) .. _whatsnew_120.api_breaking.python: From 5b7625d31376d75070f3c87e9cf8c322b39ed8ab Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 17 Oct 2020 13:45:16 -0700 Subject: [PATCH 07/12] revert a debugging change --- pandas/core/window/indexers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index d3011764beaa8..a8229257bb7bb 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -108,10 +108,9 @@ def get_window_bounds( closed: Optional[str] = None, ) -> Tuple[np.ndarray, np.ndarray]: - start, end = calculate_variable_window_bounds( + return calculate_variable_window_bounds( num_values, self.window_size, min_periods, center, closed, self.index_array ) - return start, end class VariableOffsetWindowIndexer(BaseIndexer): From fc74b5f117593ab80f2baabda225c903664e2583 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 17 Oct 2020 17:00:29 -0700 Subject: [PATCH 08/12] fix whatsnew entry --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 7ad3e37de62e6..0911654c5e37e 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -221,7 +221,7 @@ Other enhancements - :meth:`Rolling.var()` and :meth:`Rolling.std()` use Kahan summation and Welfords Method to avoid numerical issues (:issue:`37051`) - :meth:`DataFrame.plot` now recognizes ``xlabel`` and ``ylabel`` arguments for plots of type ``scatter`` and ``hexbin`` (:issue:`37001`) - :class:`DataFrame` now supports ``divmod`` operation (:issue:`37165`) -- :class:`Rolling` now supports the ``closed`` argument for fixed windows (:issue`34315`) +- :class:`Rolling` now supports the ``closed`` argument for fixed windows (:issue:`34315`) .. _whatsnew_120.api_breaking.python: From 5679de701ac2be3ae9054b3297a90d1ab47d9587 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 17 Oct 2020 17:06:16 -0700 Subject: [PATCH 09/12] Add versionchanged tag --- pandas/core/window/rolling.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 53ecea3e24819..9455e75d63c1d 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -851,6 +851,10 @@ class Window(BaseWindow): closed : str, default None Make the interval closed on the 'right', 'left', 'both' or 'neither' endpoints. Defaults to 'right'. + + .. versionchanged:: 1.2.0 + + The closed parameter with fixed windows is now supported. Returns ------- From c2c5bead110bd19ae1361265e344104e3d7830f4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 17 Oct 2020 17:08:52 -0700 Subject: [PATCH 10/12] Fix whitespace --- pandas/core/window/rolling.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 9455e75d63c1d..19d448d23b90b 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -851,9 +851,7 @@ class Window(BaseWindow): closed : str, default None Make the interval closed on the 'right', 'left', 'both' or 'neither' endpoints. Defaults to 'right'. - .. versionchanged:: 1.2.0 - The closed parameter with fixed windows is now supported. Returns From cbf088be3063b1d8c278cc204377027d59f8ddd6 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 20 Oct 2020 16:59:45 -0700 Subject: [PATCH 11/12] Add newline --- pandas/core/window/rolling.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 19d448d23b90b..bf0ff66c36bd6 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -851,6 +851,7 @@ class Window(BaseWindow): closed : str, default None Make the interval closed on the 'right', 'left', 'both' or 'neither' endpoints. Defaults to 'right'. + .. versionchanged:: 1.2.0 The closed parameter with fixed windows is now supported. From 415a8104b09b1a967c2cea02b2531e3e8c99569d Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 20 Oct 2020 17:34:57 -0700 Subject: [PATCH 12/12] Add blank line --- pandas/core/window/rolling.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index bf0ff66c36bd6..9136f9398799b 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -853,7 +853,8 @@ class Window(BaseWindow): 'neither' endpoints. Defaults to 'right'. .. versionchanged:: 1.2.0 - The closed parameter with fixed windows is now supported. + + The closed parameter with fixed windows is now supported. Returns -------