From edd02a9e7fdbb345210ba2b0d30314f550a58c04 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sat, 10 Oct 2020 11:39:35 -0400 Subject: [PATCH 1/6] reimplemented FixedWindowIndexer.get_window_bounds for clarity --- pandas/core/window/indexers.py | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index 023f598f606f3..58e731e3494d2 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -78,30 +78,16 @@ def get_window_bounds( closed: Optional[str] = None, ) -> Tuple[np.ndarray, np.ndarray]: - start_s = np.zeros(self.window_size, dtype="int64") - start_e = ( - np.arange(self.window_size, num_values, dtype="int64") - - self.window_size - + 1 - ) - start = np.concatenate([start_s, start_e])[:num_values] - - end_s = np.arange(self.window_size, dtype="int64") + 1 - end_e = start_e + self.window_size - end = np.concatenate([end_s, end_e])[:num_values] - - if center and self.window_size > 2: - offset = min((self.window_size - 1) // 2, num_values - 1) - start_s_buffer = np.roll(start, -offset)[: num_values - offset] - end_s_buffer = np.roll(end, -offset)[: num_values - offset] + if center: + offset = (self.window_size - 1) // 2 + else: + offset = 0 - start_e_buffer = np.arange( - start[-1] + 1, start[-1] + 1 + offset, dtype="int64" - ) - end_e_buffer = np.array([end[-1]] * offset, dtype="int64") + end = np.arange(1 + offset, num_values + 1 + offset).astype("int64") + start = end - self.window_size - start = np.concatenate([start_s_buffer, start_e_buffer]) - end = np.concatenate([end_s_buffer, end_e_buffer]) + end = np.clip(end, 0, num_values) + start = np.clip(start, 0, num_values) return start, end From 8d248df751fd633c2bf51e881fb0342dd5217389 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sat, 10 Oct 2020 11:40:23 -0400 Subject: [PATCH 2/6] added tests for groupby rolling when using center and min_periods --- pandas/tests/window/test_grouper.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 63bf731e95096..492eae37bec4a 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -297,6 +297,32 @@ def test_groupby_rolling_center_center(self): ) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("min_periods", [5, 4, 3]) + def test_groupby_rolling_center_min_periods(self, min_periods): + df = pd.DataFrame({"group": ["A"] * 10 + ["B"] * 10, "data": range(20)}) + + window_size = 5 + result = ( + df.groupby("group") + .rolling(window_size, center=True, min_periods=min_periods) + .mean() + ) + result = result.reset_index()[["group", "data"]] + + grp_A_mean = [1.0, 1.5, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 7.5, 8.0] + grp_B_mean = [x + 10.0 for x in grp_A_mean] + + num_nans = max(0, min_periods - 3) # For window_size of 5 + nans = [np.nan] * num_nans + grp_A_expected = nans + grp_A_mean[num_nans : 10 - num_nans] + nans + grp_B_expected = nans + grp_B_mean[num_nans : 10 - num_nans] + nans + + expected = pd.DataFrame( + {"group": ["A"] * 10 + ["B"] * 10, "data": grp_A_expected + grp_B_expected} + ) + + tm.assert_frame_equal(result, expected) + def test_groupby_subselect_rolling(self): # GH 35486 df = DataFrame( From e0130e50d90069b4bfb762500851a1d4e908533e Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sat, 10 Oct 2020 12:51:09 -0400 Subject: [PATCH 3/6] added comment containing issue number --- pandas/tests/window/test_grouper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py index 492eae37bec4a..6b80f65c16fa6 100644 --- a/pandas/tests/window/test_grouper.py +++ b/pandas/tests/window/test_grouper.py @@ -299,6 +299,7 @@ def test_groupby_rolling_center_center(self): @pytest.mark.parametrize("min_periods", [5, 4, 3]) def test_groupby_rolling_center_min_periods(self, min_periods): + # GH 36040 df = pd.DataFrame({"group": ["A"] * 10 + ["B"] * 10, "data": range(20)}) window_size = 5 From 1c5f120992baf3a62a37eb0624e3968e145aaa45 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sat, 10 Oct 2020 12:52:13 -0400 Subject: [PATCH 4/6] moved dtype into arange constructor --- pandas/core/window/indexers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index 58e731e3494d2..f2bc01438097c 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -83,7 +83,7 @@ def get_window_bounds( else: offset = 0 - end = np.arange(1 + offset, num_values + 1 + offset).astype("int64") + end = np.arange(1 + offset, num_values + 1 + offset, dtype="int64") start = end - self.window_size end = np.clip(end, 0, num_values) From b7e4ad3775a46c25981686e14bd74ecd03991dca Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sat, 10 Oct 2020 12:59:46 -0400 Subject: [PATCH 5/6] added whatsnew entry --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 57e3c9dd66afb..2b35b611046b7 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -444,6 +444,7 @@ Groupby/resample/rolling - Bug in :meth:`Rolling.count` returned ``np.nan`` with :class:`pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in window (:issue:`35579`) - Bug where :class:`pandas.core.window.Rolling` produces incorrect window sizes when using a ``PeriodIndex`` (:issue:`34225`) - Bug in :meth:`RollingGroupby.count` where a ``ValueError`` was raised when specifying the ``closed`` parameter (:issue:`35869`) +- Reimplemented :meth:`FixedWindowIndexer.get_window_bounds` in a simpler and more intuitive way (:issue:`36040`). Reshaping ^^^^^^^^^ From 39f5069c8c33dfaff3b383ee4f0dc23382e7e3f4 Mon Sep 17 00:00:00 2001 From: Justin Essert Date: Sat, 10 Oct 2020 13:24:17 -0400 Subject: [PATCH 6/6] updated whatsnew entry to describe bug --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 2b35b611046b7..a2067cf78315c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -444,7 +444,7 @@ Groupby/resample/rolling - Bug in :meth:`Rolling.count` returned ``np.nan`` with :class:`pandas.api.indexers.FixedForwardWindowIndexer` as window, ``min_periods=0`` and only missing values in window (:issue:`35579`) - Bug where :class:`pandas.core.window.Rolling` produces incorrect window sizes when using a ``PeriodIndex`` (:issue:`34225`) - Bug in :meth:`RollingGroupby.count` where a ``ValueError`` was raised when specifying the ``closed`` parameter (:issue:`35869`) -- Reimplemented :meth:`FixedWindowIndexer.get_window_bounds` in a simpler and more intuitive way (:issue:`36040`). +- Bug in :meth:`DataFrame.groupby.rolling` returning wrong values with partial centered window (:issue:`36040`). Reshaping ^^^^^^^^^