From 861f762e9cba7d411f95c279ab25c7343fc9961d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 10 Feb 2021 11:03:53 -0800 Subject: [PATCH 1/9] Remove _groupby object from class --- pandas/core/groupby/groupby.py | 30 ++++++++++++-- pandas/core/window/ewm.py | 2 +- pandas/core/window/expanding.py | 2 +- pandas/core/window/rolling.py | 72 +++++++++++++++++++++------------ 4 files changed, 75 insertions(+), 31 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5758762c13984..3c6f8e0d5cda2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1883,7 +1883,15 @@ def rolling(self, *args, **kwargs): """ from pandas.core.window import RollingGroupby - return RollingGroupby(self, *args, **kwargs) + return RollingGroupby( + self._selected_obj, + *args, + _grouping_indices=self.grouper.indices, + _grouping_keys=self.grouper.names, + _grouping_codes=self.grouper.codes, + _grouping_levels=self.grouper.levels, + **kwargs, + ) @final @Substitution(name="groupby") @@ -1895,7 +1903,15 @@ def expanding(self, *args, **kwargs): """ from pandas.core.window import ExpandingGroupby - return ExpandingGroupby(self, *args, **kwargs) + return ExpandingGroupby( + self._selected_obj, + *args, + _grouping_indices=self.grouper.indices, + _grouping_keys=self.grouper.names, + _grouping_codes=self.grouper.codes, + _grouping_levels=self.grouper.levels, + **kwargs, + ) @final @Substitution(name="groupby") @@ -1906,7 +1922,15 @@ def ewm(self, *args, **kwargs): """ from pandas.core.window import ExponentialMovingWindowGroupby - return ExponentialMovingWindowGroupby(self, *args, **kwargs) + return ExponentialMovingWindowGroupby( + self._selected_obj, + *args, + _grouping_indices=self.grouper.indices, + _grouping_keys=self.grouper.names, + _grouping_codes=self.grouper.codes, + _grouping_levels=self.grouper.levels, + **kwargs, + ) @final def _fill(self, direction, limit=None): diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 633427369902d..13ecc0acffd28 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -548,7 +548,7 @@ class ExponentialMovingWindowGroupby(BaseWindowGroupby, ExponentialMovingWindow) @property def _constructor(self): - return ExponentialMovingWindow + return ExponentialMovingWindowGroupby def _get_window_indexer(self) -> GroupbyIndexer: """ diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index f91441de41448..eb3d8c482d80d 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -617,7 +617,7 @@ class ExpandingGroupby(BaseWindowGroupby, Expanding): @property def _constructor(self): - return Expanding + return ExpandingGroupby def _get_window_indexer(self) -> GroupbyIndexer: """ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index bec6cfb375716..6dfa1f4dcced5 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -4,6 +4,7 @@ """ from __future__ import annotations +import copy from datetime import timedelta from functools import partial import inspect @@ -53,7 +54,7 @@ from pandas.core.base import DataError, SelectionMixin import pandas.core.common as common from pandas.core.construction import extract_array -from pandas.core.groupby.base import GotItemMixin, ShallowMixin +from pandas.core.groupby.base import ShallowMixin from pandas.core.indexes.api import Index, MultiIndex from pandas.core.reshape.concat import concat from pandas.core.util.numba_ import NUMBA_FUNC_CACHE, maybe_use_numba @@ -518,19 +519,40 @@ def aggregate(self, func, *args, **kwargs): agg = aggregate -class BaseWindowGroupby(GotItemMixin, BaseWindow): +class BaseWindowGroupby(BaseWindow): """ Provide the groupby windowing facilities. """ - def __init__(self, obj, *args, **kwargs): - kwargs.pop("parent", None) - groupby = kwargs.pop("groupby", None) - if groupby is None: - groupby, obj = obj, obj._selected_obj - self._groupby = groupby - self._groupby.mutated = True - self._groupby.grouper.mutated = True + _attributes: List[str] = [ + "window", + "min_periods", + "center", + "win_type", + "axis", + "on", + "closed", + "method", + "_grouping_indices", + "_grouping_keys", + "_grouping_codes", + "_grouping_levels", + ] + + def __init__( + self, + obj, + *args, + _grouping_indices=None, + _grouping_keys=None, + _grouping_codes=None, + _grouping_levels=None, + **kwargs, + ): + self._grouping_indices = _grouping_indices or {} + self._grouping_keys = _grouping_keys or [] + self._grouping_codes = _grouping_codes or [] + self._grouping_levels = _grouping_levels or [] super().__init__(obj, *args, **kwargs) def _apply( @@ -550,9 +572,7 @@ def _apply( # 1st set of levels = group by labels # 2nd set of levels = original index # Ignore 2nd set of levels if a group by label include an index level - result_index_names = [ - grouping.name for grouping in self._groupby.grouper._groupings - ] + result_index_names = copy.copy(self._grouping_keys) grouped_object_index = None column_keys = [ @@ -569,10 +589,10 @@ def _apply( # Our result will have still kept the column in the result result = result.drop(columns=column_keys, errors="ignore") - codes = self._groupby.grouper.codes - levels = self._groupby.grouper.levels + codes = self._grouping_codes + levels = copy.copy(self._grouping_levels) - group_indices = self._groupby.grouper.indices.values() + group_indices = self._grouping_indices.values() if group_indices: indexer = np.concatenate(list(group_indices)) else: @@ -606,7 +626,7 @@ def _apply_pairwise( Apply the given pairwise function given 2 pandas objects (DataFrame/Series) """ # Manually drop the grouping column first - target = target.drop(columns=self._groupby.grouper.names, errors="ignore") + target = target.drop(columns=self._grouping_keys, errors="ignore") result = super()._apply_pairwise(target, other, pairwise, func) # 1) Determine the levels + codes of the groupby levels if other is not None: @@ -617,12 +637,12 @@ def _apply_pairwise( result = concat( [ result.take(gb_indices).reindex(result.index) - for gb_indices in self._groupby.indices.values() + for gb_indices in self._grouping_indices.values() ] ) gb_pairs = ( - common.maybe_make_list(pair) for pair in self._groupby.indices.keys() + common.maybe_make_list(pair) for pair in self._grouping_indices.keys() ) groupby_codes = [] groupby_levels = [] @@ -636,10 +656,10 @@ def _apply_pairwise( else: # When we evaluate the pairwise=True result, repeat the groupby # labels by the number of columns in the original object - groupby_codes = self._groupby.grouper.codes - groupby_levels = self._groupby.grouper.levels + groupby_codes = self._grouping_codes + groupby_levels = self._grouping_levels - group_indices = self._groupby.grouper.indices.values() + group_indices = self._grouping_indices.values() if group_indices: indexer = np.concatenate(list(group_indices)) else: @@ -666,7 +686,7 @@ def _apply_pairwise( # 3) Create the resulting index by combining 1) + 2) result_codes = groupby_codes + result_codes result_levels = groupby_levels + result_levels - result_names = self._groupby.grouper.names + result_names + result_names = self._grouping_keys + result_names result_index = MultiIndex( result_levels, result_codes, names=result_names, verify_integrity=False @@ -683,7 +703,7 @@ def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries: # GH 36197 if not obj.empty: groupby_order = np.concatenate( - list(self._groupby.grouper.indices.values()) + list(self._grouping_indices.values()) ).astype(np.int64) obj = obj.take(groupby_order) return super()._create_data(obj) @@ -2144,7 +2164,7 @@ class RollingGroupby(BaseWindowGroupby, Rolling): @property def _constructor(self): - return Rolling + return RollingGroupby def _get_window_indexer(self) -> GroupbyIndexer: """ @@ -2174,7 +2194,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: window_indexer = GroupbyIndexer( index_array=index_array, window_size=window, - groupby_indicies=self._groupby.indices, + groupby_indicies=self._grouping_indices, window_indexer=rolling_indexer, indexer_kwargs=indexer_kwargs, ) From 0543909d4aa254e21f3fdccd34023c50a07ba13d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 10 Feb 2021 23:36:27 -0800 Subject: [PATCH 2/9] Add test that groupby apply is not affected --- pandas/core/window/ewm.py | 2 +- pandas/core/window/expanding.py | 2 +- pandas/tests/window/test_groupby.py | 34 +++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 13ecc0acffd28..5c0fea7152b04 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -559,7 +559,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: GroupbyIndexer """ window_indexer = GroupbyIndexer( - groupby_indicies=self._groupby.indices, + groupby_indicies=self._grouping_indices, window_indexer=ExponentialMovingWindowIndexer, ) return window_indexer diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index eb3d8c482d80d..8888a19e38d9d 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -628,7 +628,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: GroupbyIndexer """ window_indexer = GroupbyIndexer( - groupby_indicies=self._groupby.indices, + groupby_indicies=self._grouping_indices, window_indexer=ExpandingIndexer, ) return window_indexer diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index d3c2b5467e5bb..df31d24cc5326 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -82,6 +82,9 @@ def test_rolling(self, f): r = g.rolling(window=4) result = getattr(r, f)() + # GH 39732 + g.mutated = True + g.grouper.mutated = True expected = g.apply(lambda x: getattr(x.rolling(4), f)()) tm.assert_frame_equal(result, expected) @@ -91,6 +94,9 @@ def test_rolling_ddof(self, f): r = g.rolling(window=4) result = getattr(r, f)(ddof=1) + # GH 39732 + g.mutated = True + g.grouper.mutated = True expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1)) tm.assert_frame_equal(result, expected) @@ -101,6 +107,9 @@ def test_rolling_quantile(self, interpolation): g = self.frame.groupby("A") r = g.rolling(window=4) result = r.quantile(0.4, interpolation=interpolation) + # GH 39732 + g.mutated = True + g.grouper.mutated = True expected = g.apply( lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation) ) @@ -136,6 +145,9 @@ def test_rolling_apply(self, raw): # reduction result = r.apply(lambda x: x.sum(), raw=raw) + # GH 39732 + g.mutated = True + g.grouper.mutated = True expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw)) tm.assert_frame_equal(result, expected) @@ -643,6 +655,16 @@ def test_groupby_rolling_resulting_multiindex(self): ) tm.assert_index_equal(result.index, expected_index) + def test_groupby_rolling_object_doesnt_affect_groupby_apply(self): + # GH 39732 + g = self.frame.groupby("A") + expected = g.apply(lambda x: x.rolling(4).sum()).index + _ = g.rolling(window=4) + result = g.apply(lambda x: x.rolling(4).sum()).index + tm.assert_index_equal(result, expected) + assert not g.mutated + assert not g.grouper.mutated + class TestExpanding: def setup_method(self): @@ -656,6 +678,9 @@ def test_expanding(self, f): r = g.expanding() result = getattr(r, f)() + # GH 39732 + g.mutated = True + g.grouper.mutated = True expected = g.apply(lambda x: getattr(x.expanding(), f)()) tm.assert_frame_equal(result, expected) @@ -665,6 +690,9 @@ def test_expanding_ddof(self, f): r = g.expanding() result = getattr(r, f)(ddof=0) + # GH 39732 + g.mutated = True + g.grouper.mutated = True expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0)) tm.assert_frame_equal(result, expected) @@ -675,6 +703,9 @@ def test_expanding_quantile(self, interpolation): g = self.frame.groupby("A") r = g.expanding() result = r.quantile(0.4, interpolation=interpolation) + # GH 39732 + g.mutated = True + g.grouper.mutated = True expected = g.apply( lambda x: x.expanding().quantile(0.4, interpolation=interpolation) ) @@ -714,6 +745,9 @@ def test_expanding_apply(self, raw): # reduction result = r.apply(lambda x: x.sum(), raw=raw) + # GH 39732 + g.mutated = True + g.grouper.mutated = True expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw)) tm.assert_frame_equal(result, expected) From 7e572bec8f3378020ed69a4d7cd44ad1415a1ada Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 11 Feb 2021 10:43:09 -0800 Subject: [PATCH 3/9] Remove unnecessary groupby subclasses --- pandas/core/groupby/base.py | 11 ++--------- pandas/core/window/rolling.py | 11 +++++++---- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 594c5899209df..99426c55da29b 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -5,7 +5,6 @@ """ import collections from typing import List -import warnings from pandas._typing import final @@ -28,10 +27,7 @@ def _shallow_copy(self, obj, **kwargs): obj = obj.obj for attr in self._attributes: if attr not in kwargs: - # TODO: Remove once win_type deprecation is enforced - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "win_type", FutureWarning) - kwargs[attr] = getattr(self, attr) + kwargs[attr] = getattr(self, attr) return self._constructor(obj, **kwargs) @@ -63,10 +59,7 @@ def _gotitem(self, key, ndim, subset=None): # we need to make a shallow copy of ourselves # with the same groupby - # TODO: Remove once win_type deprecation is enforced - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", "win_type", FutureWarning) - kwargs = {attr: getattr(self, attr) for attr in self._attributes} + kwargs = {attr: getattr(self, attr) for attr in self._attributes} # Try to select from a DataFrame, falling back to a Series try: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 6dfa1f4dcced5..e832db47fe1d3 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -54,7 +54,6 @@ from pandas.core.base import DataError, SelectionMixin import pandas.core.common as common from pandas.core.construction import extract_array -from pandas.core.groupby.base import ShallowMixin from pandas.core.indexes.api import Index, MultiIndex from pandas.core.reshape.concat import concat from pandas.core.util.numba_ import NUMBA_FUNC_CACHE, maybe_use_numba @@ -89,7 +88,7 @@ from pandas.core.internals import Block # noqa:F401 -class BaseWindow(ShallowMixin, SelectionMixin): +class BaseWindow(SelectionMixin): """Provides utilities for performing windowing operations.""" _attributes: List[str] = [ @@ -237,8 +236,12 @@ def _gotitem(self, key, ndim, subset=None): # create a new object to prevent aliasing if subset is None: subset = self.obj - self = self._shallow_copy(subset) - self._reset_cache() + # TODO: Remove once win_type deprecation is enforced + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "win_type", FutureWarning) + self = type(self)( + subset, **{attr: getattr(self, attr) for attr in self._attributes} + ) if subset.ndim == 2: if is_scalar(key) and key in subset or is_list_like(key): self._selection = key From ef3f79144541d7ab942dea7135b65de82c1c2b09 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 11 Feb 2021 10:58:42 -0800 Subject: [PATCH 4/9] Supress private variables in repr --- pandas/core/window/rolling.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e832db47fe1d3..4475dc6e59b1e 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -116,8 +116,6 @@ def __init__( method: str = "single", **kwargs, ): - - self.__dict__.update(kwargs) self.obj = obj self.on = on self.closed = closed @@ -267,7 +265,7 @@ def __repr__(self) -> str: attrs_list = ( f"{attr_name}={getattr(self, attr_name)}" for attr_name in self._attributes - if getattr(self, attr_name, None) is not None + if getattr(self, attr_name, None) is not None and attr_name[0] != "_" ) attrs = ",".join(attrs_list) return f"{type(self).__name__} [{attrs}]" From 3d76221a14c8ad9cec2d4719a32d21ce22582b1e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 11 Feb 2021 11:38:20 -0800 Subject: [PATCH 5/9] Don't have window classes accept kwargs --- pandas/core/window/ewm.py | 29 ++++++++++++----- pandas/core/window/expanding.py | 6 ++-- pandas/core/window/rolling.py | 47 +++++++++++++++------------ pandas/tests/window/test_expanding.py | 2 +- 4 files changed, 51 insertions(+), 33 deletions(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 5c0fea7152b04..55f68254bb4f6 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -203,7 +203,15 @@ class ExponentialMovingWindow(BaseWindow): 4 3.233686 """ - _attributes = ["com", "min_periods", "adjust", "ignore_na", "axis"] + _attributes = [ + "com", + "min_periods", + "adjust", + "ignore_na", + "axis", + "halflife", + "times", + ] def __init__( self, @@ -217,17 +225,18 @@ def __init__( ignore_na: bool = False, axis: int = 0, times: Optional[Union[str, np.ndarray, FrameOrSeries]] = None, - **kwargs, ): - self.obj = obj - self.min_periods = max(int(min_periods), 1) + super().__init__( + obj=obj, + min_periods=max(int(min_periods), 1), + on=None, + center=False, + closed=None, + method="single", + axis=axis, + ) self.adjust = adjust self.ignore_na = ignore_na - self.axis = axis - self.on = None - self.center = False - self.closed = None - self.method = "single" if times is not None: if isinstance(times, str): times = self._selected_obj[times] @@ -546,6 +555,8 @@ class ExponentialMovingWindowGroupby(BaseWindowGroupby, ExponentialMovingWindow) Provide an exponential moving window groupby implementation. """ + _attributes = ExponentialMovingWindow._attributes + BaseWindowGroupby._attributes + @property def _constructor(self): return ExponentialMovingWindowGroupby diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 8888a19e38d9d..c825ca833431d 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -80,9 +80,7 @@ class Expanding(RollingAndExpandingMixin): _attributes = ["min_periods", "center", "axis", "method"] - def __init__( - self, obj, min_periods=1, center=None, axis=0, method="single", **kwargs - ): + def __init__(self, obj, min_periods=1, center=None, axis=0, method="single"): super().__init__( obj=obj, min_periods=min_periods, center=center, axis=axis, method=method ) @@ -615,6 +613,8 @@ class ExpandingGroupby(BaseWindowGroupby, Expanding): Provide a expanding groupby implementation. """ + _attributes = Expanding._attributes + BaseWindowGroupby._attributes + @property def _constructor(self): return ExpandingGroupby diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 4475dc6e59b1e..4e523eb4bb5f1 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -91,16 +91,7 @@ class BaseWindow(SelectionMixin): """Provides utilities for performing windowing operations.""" - _attributes: List[str] = [ - "window", - "min_periods", - "center", - "win_type", - "axis", - "on", - "closed", - "method", - ] + _attributes: List[str] = [] exclusions: Set[str] = set() def __init__( @@ -114,7 +105,6 @@ def __init__( on: Optional[Union[str, Index]] = None, closed: Optional[str] = None, method: str = "single", - **kwargs, ): self.obj = obj self.on = on @@ -525,15 +515,7 @@ class BaseWindowGroupby(BaseWindow): Provide the groupby windowing facilities. """ - _attributes: List[str] = [ - "window", - "min_periods", - "center", - "win_type", - "axis", - "on", - "closed", - "method", + _attributes = [ "_grouping_indices", "_grouping_keys", "_grouping_codes", @@ -895,6 +877,17 @@ class Window(BaseWindow): 2013-01-01 09:00:06 4.0 """ + _attributes = [ + "window", + "min_periods", + "center", + "win_type", + "axis", + "on", + "closed", + "method", + ] + def validate(self): super().validate() @@ -1385,6 +1378,18 @@ def corr_func(x, y): class Rolling(RollingAndExpandingMixin): + + _attributes = [ + "window", + "min_periods", + "center", + "win_type", + "axis", + "on", + "closed", + "method", + ] + def validate(self): super().validate() @@ -2163,6 +2168,8 @@ class RollingGroupby(BaseWindowGroupby, Rolling): Provide a rolling groupby implementation. """ + _attributes = Rolling._attributes + BaseWindowGroupby._attributes + @property def _constructor(self): return RollingGroupby diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 01804faad5a5e..554b8a404f714 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -49,7 +49,7 @@ def test_constructor_invalid(frame_or_series, w): @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) def test_numpy_compat(method): # see gh-12811 - e = Expanding(Series([2, 4, 6]), window=2) + e = Expanding(Series([2, 4, 6])) msg = "numpy operations are not valid with window objects" From f914c665d4a38f87bb3bc1c8d1ddafd338bc933a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 11 Feb 2021 17:07:34 -0800 Subject: [PATCH 6/9] Add whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 09e1853429d9f..2242acb642acb 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -416,6 +416,7 @@ Groupby/resample/rolling - Bug in :meth:`Series.resample` would raise when the index was a :class:`PeriodIndex` consisting of ``NaT`` (:issue:`39227`) - Bug in :meth:`core.window.rolling.RollingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.corr` where the groupby column would return 0 instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`) - Bug in :meth:`core.window.expanding.ExpandingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.cov` where 1 would be returned instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`) +- Bug in :meth:`DataFrameGroupBy.apply` where a :class:`MultiIndex` would be created instead of an :class:`Index` if a :class:`:meth:`core.window.rolling.RollingGroupby` object was created (:issue:`39732`) Reshaping ^^^^^^^^^ From f330a78f0d62cf4571d4792551be15054c924309 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 11 Feb 2021 17:10:59 -0800 Subject: [PATCH 7/9] Remove unused constructor --- pandas/core/window/ewm.py | 4 ---- pandas/core/window/expanding.py | 4 ---- pandas/core/window/rolling.py | 4 ---- 3 files changed, 12 deletions(-) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 55f68254bb4f6..4c757e5e91671 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -557,10 +557,6 @@ class ExponentialMovingWindowGroupby(BaseWindowGroupby, ExponentialMovingWindow) _attributes = ExponentialMovingWindow._attributes + BaseWindowGroupby._attributes - @property - def _constructor(self): - return ExponentialMovingWindowGroupby - def _get_window_indexer(self) -> GroupbyIndexer: """ Return an indexer class that will compute the window start and end bounds diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index c825ca833431d..b13484c80c82b 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -615,10 +615,6 @@ class ExpandingGroupby(BaseWindowGroupby, Expanding): _attributes = Expanding._attributes + BaseWindowGroupby._attributes - @property - def _constructor(self): - return ExpandingGroupby - def _get_window_indexer(self) -> GroupbyIndexer: """ Return an indexer class that will compute the window start and end bounds diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 8a0e813e2351e..9f5b20d6ae54b 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2170,10 +2170,6 @@ class RollingGroupby(BaseWindowGroupby, Rolling): _attributes = Rolling._attributes + BaseWindowGroupby._attributes - @property - def _constructor(self): - return RollingGroupby - def _get_window_indexer(self) -> GroupbyIndexer: """ Return an indexer class that will compute the window start and end bounds From 885745a773a36e338f21b7f1a6b389fd9ece0cd8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 22 Feb 2021 20:23:30 -0800 Subject: [PATCH 8/9] Just user the grouper --- pandas/core/groupby/groupby.py | 15 ++-------- pandas/core/window/ewm.py | 2 +- pandas/core/window/expanding.py | 2 +- pandas/core/window/rolling.py | 49 ++++++++++++++------------------- 4 files changed, 25 insertions(+), 43 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 76802e6c4b53b..8e9cf066d39e2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1921,10 +1921,7 @@ def rolling(self, *args, **kwargs): return RollingGroupby( self._selected_obj, *args, - _grouping_indices=self.grouper.indices, - _grouping_keys=self.grouper.names, - _grouping_codes=self.grouper.codes, - _grouping_levels=self.grouper.levels, + _grouper=self.grouper, **kwargs, ) @@ -1941,10 +1938,7 @@ def expanding(self, *args, **kwargs): return ExpandingGroupby( self._selected_obj, *args, - _grouping_indices=self.grouper.indices, - _grouping_keys=self.grouper.names, - _grouping_codes=self.grouper.codes, - _grouping_levels=self.grouper.levels, + _grouper=self.grouper, **kwargs, ) @@ -1960,10 +1954,7 @@ def ewm(self, *args, **kwargs): return ExponentialMovingWindowGroupby( self._selected_obj, *args, - _grouping_indices=self.grouper.indices, - _grouping_keys=self.grouper.names, - _grouping_codes=self.grouper.codes, - _grouping_levels=self.grouper.levels, + _grouper=self.grouper, **kwargs, ) diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 034c1d96a0aa6..208b5ab0023eb 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -576,7 +576,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: GroupbyIndexer """ window_indexer = GroupbyIndexer( - groupby_indicies=self._grouping_indices, + groupby_indicies=self._grouper.indices, window_indexer=ExponentialMovingWindowIndexer, ) return window_indexer diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 3a87ad19fd286..c201216a91ab1 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -638,7 +638,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: GroupbyIndexer """ window_indexer = GroupbyIndexer( - groupby_indicies=self._grouping_indices, + groupby_indicies=self._grouper.indices, window_indexer=ExpandingIndexer, ) return window_indexer diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index af564dc787b31..844f04ab7c196 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -538,27 +538,18 @@ class BaseWindowGroupby(BaseWindow): Provide the groupby windowing facilities. """ - _attributes = [ - "_grouping_indices", - "_grouping_keys", - "_grouping_codes", - "_grouping_levels", - ] + _attributes = ["_grouper"] def __init__( self, obj, *args, - _grouping_indices=None, - _grouping_keys=None, - _grouping_codes=None, - _grouping_levels=None, + _grouper=None, **kwargs, ): - self._grouping_indices = _grouping_indices or {} - self._grouping_keys = _grouping_keys or [] - self._grouping_codes = _grouping_codes or [] - self._grouping_levels = _grouping_levels or [] + if _grouper is None: + raise ValueError("Must pass a Grouper object.") + self._grouper = _grouper super().__init__(obj, *args, **kwargs) def _apply( @@ -578,7 +569,7 @@ def _apply( # 1st set of levels = group by labels # 2nd set of levels = original index # Ignore 2nd set of levels if a group by label include an index level - result_index_names = copy.copy(self._grouping_keys) + result_index_names = copy.copy(self._grouper.names) grouped_object_index = None column_keys = [ @@ -595,10 +586,10 @@ def _apply( # Our result will have still kept the column in the result result = result.drop(columns=column_keys, errors="ignore") - codes = self._grouping_codes - levels = copy.copy(self._grouping_levels) + codes = self._grouper.codes + levels = copy.copy(self._grouper.levels) - group_indices = self._grouping_indices.values() + group_indices = self._grouper.indices.values() if group_indices: indexer = np.concatenate(list(group_indices)) else: @@ -632,7 +623,7 @@ def _apply_pairwise( Apply the given pairwise function given 2 pandas objects (DataFrame/Series) """ # Manually drop the grouping column first - target = target.drop(columns=self._grouping_keys, errors="ignore") + target = target.drop(columns=self._grouper.names, errors="ignore") result = super()._apply_pairwise(target, other, pairwise, func) # 1) Determine the levels + codes of the groupby levels if other is not None: @@ -643,12 +634,12 @@ def _apply_pairwise( result = concat( [ result.take(gb_indices).reindex(result.index) - for gb_indices in self._grouping_indices.values() + for gb_indices in self._grouper.indices.values() ] ) gb_pairs = ( - common.maybe_make_list(pair) for pair in self._grouping_indices.keys() + common.maybe_make_list(pair) for pair in self._grouper.indices.keys() ) groupby_codes = [] groupby_levels = [] @@ -662,10 +653,10 @@ def _apply_pairwise( else: # When we evaluate the pairwise=True result, repeat the groupby # labels by the number of columns in the original object - groupby_codes = self._grouping_codes - groupby_levels = self._grouping_levels + groupby_codes = self._grouper.codes + groupby_levels = self._grouper.levels - group_indices = self._grouping_indices.values() + group_indices = self._grouper.indices.values() if group_indices: indexer = np.concatenate(list(group_indices)) else: @@ -692,7 +683,7 @@ def _apply_pairwise( # 3) Create the resulting index by combining 1) + 2) result_codes = groupby_codes + result_codes result_levels = groupby_levels + result_levels - result_names = self._grouping_keys + result_names + result_names = self._grouper.names + result_names result_index = MultiIndex( result_levels, result_codes, names=result_names, verify_integrity=False @@ -708,9 +699,9 @@ def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries: # to the groups # GH 36197 if not obj.empty: - groupby_order = np.concatenate( - list(self._grouping_indices.values()) - ).astype(np.int64) + groupby_order = np.concatenate(list(self._grouper.indices.values())).astype( + np.int64 + ) obj = obj.take(groupby_order) return super()._create_data(obj) @@ -2221,7 +2212,7 @@ def _get_window_indexer(self) -> GroupbyIndexer: window_indexer = GroupbyIndexer( index_array=index_array, window_size=window, - groupby_indicies=self._grouping_indices, + groupby_indicies=self._grouper.indices, window_indexer=rolling_indexer, indexer_kwargs=indexer_kwargs, ) From 54927a70ec148f9e7ff4dd1c688a837892e575fd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 22 Feb 2021 20:54:26 -0800 Subject: [PATCH 9/9] Make expected index instead --- pandas/tests/window/test_groupby.py | 50 +++++++++++++++-------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index df31d24cc5326..c3c5bbe460134 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -82,10 +82,10 @@ def test_rolling(self, f): r = g.rolling(window=4) result = getattr(r, f)() - # GH 39732 - g.mutated = True - g.grouper.mutated = True expected = g.apply(lambda x: getattr(x.rolling(4), f)()) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("f", ["std", "var"]) @@ -94,10 +94,10 @@ def test_rolling_ddof(self, f): r = g.rolling(window=4) result = getattr(r, f)(ddof=1) - # GH 39732 - g.mutated = True - g.grouper.mutated = True expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1)) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -106,13 +106,14 @@ def test_rolling_ddof(self, f): def test_rolling_quantile(self, interpolation): g = self.frame.groupby("A") r = g.rolling(window=4) + result = r.quantile(0.4, interpolation=interpolation) - # GH 39732 - g.mutated = True - g.grouper.mutated = True expected = g.apply( lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation) ) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("f", ["corr", "cov"]) @@ -145,10 +146,10 @@ def test_rolling_apply(self, raw): # reduction result = r.apply(lambda x: x.sum(), raw=raw) - # GH 39732 - g.mutated = True - g.grouper.mutated = True expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw)) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) def test_rolling_apply_mutability(self): @@ -678,10 +679,10 @@ def test_expanding(self, f): r = g.expanding() result = getattr(r, f)() - # GH 39732 - g.mutated = True - g.grouper.mutated = True expected = g.apply(lambda x: getattr(x.expanding(), f)()) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("f", ["std", "var"]) @@ -690,10 +691,10 @@ def test_expanding_ddof(self, f): r = g.expanding() result = getattr(r, f)(ddof=0) - # GH 39732 - g.mutated = True - g.grouper.mutated = True expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0)) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -702,13 +703,14 @@ def test_expanding_ddof(self, f): def test_expanding_quantile(self, interpolation): g = self.frame.groupby("A") r = g.expanding() + result = r.quantile(0.4, interpolation=interpolation) - # GH 39732 - g.mutated = True - g.grouper.mutated = True expected = g.apply( lambda x: x.expanding().quantile(0.4, interpolation=interpolation) ) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("f", ["corr", "cov"]) @@ -745,10 +747,10 @@ def test_expanding_apply(self, raw): # reduction result = r.apply(lambda x: x.sum(), raw=raw) - # GH 39732 - g.mutated = True - g.grouper.mutated = True expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw)) + # GH 39732 + expected_index = MultiIndex.from_arrays([self.frame["A"], range(40)]) + expected.index = expected_index tm.assert_frame_equal(result, expected)