From e503c34ce8dc38df0f2c97a1b63526aab4b191e8 Mon Sep 17 00:00:00 2001 From: Abdullah Ihsan Secer Date: Sun, 3 Sep 2023 00:09:31 +0100 Subject: [PATCH 1/8] Fix BaseWindowGroupby.aggregate where as_index is ignored --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/window/rolling.py | 8 ++++++++ pandas/tests/window/test_groupby.py | 22 ++++++++++++++++------ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 17abb3debe3e7..32bb0b89b7b45 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -168,6 +168,7 @@ Performance improvements Bug fixes ~~~~~~~~~ - Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`) +- Bug in :meth:`BaseWindowGroupby.aggregate` where ``_as_index`` attribute is ignored (:issue:`31007`) Categorical ^^^^^^^^^^^ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index becbba703f92c..b6f3ecd90e3e0 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -875,6 +875,14 @@ def _gotitem(self, key, ndim, subset=None): subset = self.obj.set_index(self._on) return super()._gotitem(key, ndim, subset=subset) + def aggregate(self, func, *args, **kwargs): + result = super().aggregate(func, *args, **kwargs) + if not self._as_index: + result = result.reset_index(level=list(range(len(self._grouper.names)))) + return result + + agg = aggregate + class Window(BaseWindow): """ diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index ab00e18fc4812..145f7972c03cd 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -846,10 +846,20 @@ def test_groupby_level(self): tm.assert_series_equal(result, expected) @pytest.mark.parametrize( - "by, expected_data", + "func, by, expected_data", [ - [["id"], {"num": [100.0, 150.0, 150.0, 200.0]}], [ + lambda x: x.agg({"num": "mean"}), + ["id"], + {"num": [100.0, 150.0, 150.0, 200.0]}, + ], + [ + lambda x: x.mean(), + ["id"], + {"num": [100.0, 150.0, 150.0, 200.0]}, + ], + [ + lambda x: x.mean(), ["id", "index"], { "date": [ @@ -863,8 +873,8 @@ def test_groupby_level(self): ], ], ) - def test_as_index_false(self, by, expected_data): - # GH 39433 + def test_as_index_false(self, func, by, expected_data): + # GH 39433, 31007 data = [ ["A", "2018-01-01", 100.0], ["A", "2018-01-02", 200.0], @@ -876,8 +886,8 @@ def test_as_index_false(self, by, expected_data): df = df.set_index(["date"]) gp_by = [getattr(df, attr) for attr in by] - result = ( - df.groupby(gp_by, as_index=False).rolling(window=2, min_periods=1).mean() + result = func( + df.groupby(gp_by, as_index=False).rolling(window=2, min_periods=1) ) expected = {"id": ["A", "A", "B", "B"]} From ac3c0a1bcd6a8db42de74dae6f6ea77c34d48291 Mon Sep 17 00:00:00 2001 From: Abdullah Ihsan Secer Date: Sun, 3 Sep 2023 18:11:07 +0100 Subject: [PATCH 2/8] Fix mypy base class incompatibility issue --- pandas/core/window/ewm.py | 5 +++++ pandas/core/window/expanding.py | 5 +++++ pandas/core/window/rolling.py | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 775f3cd428677..ea0f9501c37fb 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -919,6 +919,11 @@ def _get_window_indexer(self) -> GroupbyIndexer: ) return window_indexer + def aggregate(self, func, *args, **kwargs): + return super().aggregate(func, *args, **kwargs) + + agg = aggregate + class OnlineExponentialMovingWindow(ExponentialMovingWindow): def __init__( diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index aac10596ffc69..5904362c601fa 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -962,3 +962,8 @@ def _get_window_indexer(self) -> GroupbyIndexer: window_indexer=ExpandingIndexer, ) return window_indexer + + def aggregate(self, func, *args, **kwargs): + return super().aggregate(func, *args, **kwargs) + + agg = aggregate diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b6f3ecd90e3e0..f2f18ff50cf7d 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2919,3 +2919,8 @@ def _validate_datetimelike_monotonic(self): f"Each group within {on} must be monotonic. " f"Sort the values in {on} first." ) + + def aggregate(self, func, *args, **kwargs): + return super().aggregate(func, *args, **kwargs) + + agg = aggregate From 4d2875bd085cad64a3a33e7cdb47dbe6e3b83b98 Mon Sep 17 00:00:00 2001 From: Abdullah Ihsan Secer Date: Sun, 3 Sep 2023 19:52:07 +0100 Subject: [PATCH 3/8] Add @doc --- pandas/core/window/ewm.py | 1 + pandas/core/window/expanding.py | 1 + pandas/core/window/rolling.py | 1 + 3 files changed, 3 insertions(+) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index ea0f9501c37fb..5a02682cc537f 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -919,6 +919,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: ) return window_indexer + @doc(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): return super().aggregate(func, *args, **kwargs) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 5904362c601fa..5a9435704afd2 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -963,6 +963,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: ) return window_indexer + @doc(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): return super().aggregate(func, *args, **kwargs) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index f2f18ff50cf7d..295d753eed18d 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2920,6 +2920,7 @@ def _validate_datetimelike_monotonic(self): f"Sort the values in {on} first." ) + @doc(_shared_docs["aggregate"]) def aggregate(self, func, *args, **kwargs): return super().aggregate(func, *args, **kwargs) From cbfb7b9ef58da56920fbd7619c90d0b3d0721dbe Mon Sep 17 00:00:00 2001 From: Abdullah Ihsan Secer Date: Wed, 6 Sep 2023 00:58:36 +0100 Subject: [PATCH 4/8] Update whatsnew --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 32bb0b89b7b45..e3047381facf6 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -168,7 +168,7 @@ Performance improvements Bug fixes ~~~~~~~~~ - Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`) -- Bug in :meth:`BaseWindowGroupby.aggregate` where ``_as_index`` attribute is ignored (:issue:`31007`) +- Bug in :meth:`DataFrameGroupBy.rolling.agg` where ``as_index`` is ignored with list-like and dictionary-like ``func`` parameters (:issue:`31007`) Categorical ^^^^^^^^^^^ From 27ce5dc3b111b7970edcb064d413a5872c28819d Mon Sep 17 00:00:00 2001 From: Abdullah Ihsan Secer Date: Wed, 6 Sep 2023 01:00:26 +0100 Subject: [PATCH 5/8] Add a separate test and test different func params --- pandas/tests/window/test_groupby.py | 58 +++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 145f7972c03cd..e2cfdbb35a501 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -846,20 +846,10 @@ def test_groupby_level(self): tm.assert_series_equal(result, expected) @pytest.mark.parametrize( - "func, by, expected_data", + "by, expected_data", [ + [["id"], {"num": [100.0, 150.0, 150.0, 200.0]}], [ - lambda x: x.agg({"num": "mean"}), - ["id"], - {"num": [100.0, 150.0, 150.0, 200.0]}, - ], - [ - lambda x: x.mean(), - ["id"], - {"num": [100.0, 150.0, 150.0, 200.0]}, - ], - [ - lambda x: x.mean(), ["id", "index"], { "date": [ @@ -873,8 +863,8 @@ def test_groupby_level(self): ], ], ) - def test_as_index_false(self, func, by, expected_data): - # GH 39433, 31007 + def test_as_index_false(self, by, expected_data): + # GH 39433 data = [ ["A", "2018-01-01", 100.0], ["A", "2018-01-02", 200.0], @@ -886,8 +876,8 @@ def test_as_index_false(self, func, by, expected_data): df = df.set_index(["date"]) gp_by = [getattr(df, attr) for attr in by] - result = func( - df.groupby(gp_by, as_index=False).rolling(window=2, min_periods=1) + result = ( + df.groupby(gp_by, as_index=False).rolling(window=2, min_periods=1).mean() ) expected = {"id": ["A", "A", "B", "B"]} @@ -898,6 +888,42 @@ def test_as_index_false(self, func, by, expected_data): ) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "f", ["mean", lambda x: x.mean(), {"agg_col": "mean"}, ["mean"]] + ) + def test_aggregate_as_index_false(self, f): + # GH 31007 + index = date_range(end="2020-01-01", periods=10) + groupby_col = ["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"] + df = DataFrame( + {"groupby_col": groupby_col, "agg_col": [1, 1, 0, 1, 0, 0, 0, 0, 1, 0]}, + index=index, + ) + + result = df.groupby("groupby_col", as_index=False).rolling(4).agg(f) + if isinstance(f, list): + result.columns = result.columns.get_level_values(0) + + expected = DataFrame( + { + "groupby_col": groupby_col, + "agg_col": [ + np.nan, + np.nan, + np.nan, + 0.75, + 0.5, + np.nan, + np.nan, + np.nan, + 0.25, + 0.25, + ], + }, + index=index, + ) + tm.assert_frame_equal(result, expected) + def test_nan_and_zero_endpoints(self, any_int_numpy_dtype): # https://github.com/twosigma/pandas/issues/53 typ = np.dtype(any_int_numpy_dtype).type From 22bc4f7c7cf1ad8e2823255154586c8c0fc68180 Mon Sep 17 00:00:00 2001 From: Abdullah Ihsan Secer Date: Wed, 6 Sep 2023 01:05:30 +0100 Subject: [PATCH 6/8] Add type ignore and remove agg functions --- pandas/core/window/ewm.py | 12 +++++------- pandas/core/window/expanding.py | 10 +++------- pandas/core/window/rolling.py | 10 +++------- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 5a02682cc537f..c72fec8a29a78 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -887,7 +887,11 @@ def _cov(X, Y): ) -class ExponentialMovingWindowGroupby(BaseWindowGroupby, ExponentialMovingWindow): +# error: Definition of "agg" in base class "BaseWindowGroupby" is +# incompatible with definition in base class "ExponentialMovingWindow" +class ExponentialMovingWindowGroupby( + BaseWindowGroupby, ExponentialMovingWindow +): # type: ignore[misc] """ Provide an exponential moving window groupby implementation. """ @@ -919,12 +923,6 @@ def _get_window_indexer(self) -> GroupbyIndexer: ) return window_indexer - @doc(_shared_docs["aggregate"]) - def aggregate(self, func, *args, **kwargs): - return super().aggregate(func, *args, **kwargs) - - agg = aggregate - class OnlineExponentialMovingWindow(ExponentialMovingWindow): def __init__( diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 5a9435704afd2..1a220990d15e3 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -942,7 +942,9 @@ def corr( ) -class ExpandingGroupby(BaseWindowGroupby, Expanding): +# error: Definition of "agg" in base class "BaseWindowGroupby" is +# incompatible with definition in base class "Expanding" +class ExpandingGroupby(BaseWindowGroupby, Expanding): # type: ignore[misc] """ Provide a expanding groupby implementation. """ @@ -962,9 +964,3 @@ def _get_window_indexer(self) -> GroupbyIndexer: window_indexer=ExpandingIndexer, ) return window_indexer - - @doc(_shared_docs["aggregate"]) - def aggregate(self, func, *args, **kwargs): - return super().aggregate(func, *args, **kwargs) - - agg = aggregate diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 295d753eed18d..c0d437e0c47a2 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2860,7 +2860,9 @@ def corr( Rolling.__doc__ = Window.__doc__ -class RollingGroupby(BaseWindowGroupby, Rolling): +# error: Definition of "agg" in base class "BaseWindowGroupby" is +# incompatible with definition in base class "Rolling" +class RollingGroupby(BaseWindowGroupby, Rolling): # type: ignore[misc] """ Provide a rolling groupby implementation. """ @@ -2919,9 +2921,3 @@ def _validate_datetimelike_monotonic(self): f"Each group within {on} must be monotonic. " f"Sort the values in {on} first." ) - - @doc(_shared_docs["aggregate"]) - def aggregate(self, func, *args, **kwargs): - return super().aggregate(func, *args, **kwargs) - - agg = aggregate From 0a3bc078691392b87ea350fc42d91933cdae26d6 Mon Sep 17 00:00:00 2001 From: Abdullah Ihsan Secer Date: Wed, 6 Sep 2023 01:07:32 +0100 Subject: [PATCH 7/8] Only reset index for list-like and dict-like --- pandas/core/window/rolling.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index c0d437e0c47a2..254a729424ce4 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -34,7 +34,9 @@ from pandas.core.dtypes.common import ( ensure_float64, is_bool, + is_dict_like, is_integer, + is_list_like, is_numeric_dtype, needs_i8_conversion, ) @@ -877,7 +879,7 @@ def _gotitem(self, key, ndim, subset=None): def aggregate(self, func, *args, **kwargs): result = super().aggregate(func, *args, **kwargs) - if not self._as_index: + if not self._as_index and (is_list_like(func) or is_dict_like(func)): result = result.reset_index(level=list(range(len(self._grouper.names)))) return result From 4a1d5bc84c34c224879ef4a96f812b947e0f3443 Mon Sep 17 00:00:00 2001 From: Abdullah Ihsan Secer Date: Wed, 6 Sep 2023 01:18:39 +0100 Subject: [PATCH 8/8] Move type ignore to top --- pandas/core/window/ewm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index c72fec8a29a78..3d15955a12564 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -889,9 +889,9 @@ def _cov(X, Y): # error: Definition of "agg" in base class "BaseWindowGroupby" is # incompatible with definition in base class "ExponentialMovingWindow" -class ExponentialMovingWindowGroupby( +class ExponentialMovingWindowGroupby( # type: ignore[misc] BaseWindowGroupby, ExponentialMovingWindow -): # type: ignore[misc] +): """ Provide an exponential moving window groupby implementation. """