diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 29a161676b2db..4310d849d66e8 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3064,7 +3064,7 @@ def _reindex_output( # reindexing only applies to a Categorical grouper elif not any( - isinstance(ping.grouper, (Categorical, CategoricalIndex)) + isinstance(ping.grouping_vector, (Categorical, CategoricalIndex)) for ping in groupings ): return output diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index e2855cbc90425..877204f5bb2a2 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -455,7 +455,7 @@ def __init__( ): self.level = level self._orig_grouper = grouper - self.grouper = _convert_grouper(index, grouper) + self.grouping_vector = _convert_grouper(index, grouper) self.all_grouper = None self.index = index self.sort = sort @@ -472,19 +472,19 @@ def __init__( ilevel = self._ilevel if ilevel is not None: ( - self.grouper, # Index + self.grouping_vector, # Index self._codes, self._group_index, - ) = index._get_grouper_for_level(self.grouper, ilevel) + ) = index._get_grouper_for_level(self.grouping_vector, ilevel) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get codes - elif isinstance(self.grouper, Grouper): + elif isinstance(self.grouping_vector, Grouper): # get the new grouper; we already have disambiguated # what key/level refer to exactly, don't need to # check again as we have by this point converted these # to an actual value (rather than a pd.Grouper) - _, newgrouper, newobj = self.grouper._get_grouper( + _, newgrouper, newobj = self.grouping_vector._get_grouper( # error: Value of type variable "FrameOrSeries" of "_get_grouper" # of "Grouper" cannot be "Optional[FrameOrSeries]" self.obj, # type: ignore[type-var] @@ -495,44 +495,46 @@ def __init__( ng = newgrouper._get_grouper() if isinstance(newgrouper, ops.BinGrouper): # in this case we have `ng is newgrouper` - self.grouper = ng + self.grouping_vector = ng else: # ops.BaseGrouper # use Index instead of ndarray so we can recover the name - self.grouper = Index(ng, name=newgrouper.result_index.name) + self.grouping_vector = Index(ng, name=newgrouper.result_index.name) - elif is_categorical_dtype(self.grouper): + elif is_categorical_dtype(self.grouping_vector): # a passed Categorical self._passed_categorical = True - self.grouper, self.all_grouper = recode_for_groupby( - self.grouper, self.sort, observed + self.grouping_vector, self.all_grouper = recode_for_groupby( + self.grouping_vector, self.sort, observed ) - elif not isinstance(self.grouper, (Series, Index, ExtensionArray, np.ndarray)): + elif not isinstance( + self.grouping_vector, (Series, Index, ExtensionArray, np.ndarray) + ): # no level passed - if getattr(self.grouper, "ndim", 1) != 1: - t = self.name or str(type(self.grouper)) + if getattr(self.grouping_vector, "ndim", 1) != 1: + t = self.name or str(type(self.grouping_vector)) raise ValueError(f"Grouper for '{t}' not 1-dimensional") - self.grouper = self.index.map(self.grouper) + self.grouping_vector = self.index.map(self.grouping_vector) if not ( - hasattr(self.grouper, "__len__") - and len(self.grouper) == len(self.index) + hasattr(self.grouping_vector, "__len__") + and len(self.grouping_vector) == len(self.index) ): - grper = pprint_thing(self.grouper) + grper = pprint_thing(self.grouping_vector) errmsg = ( "Grouper result violates len(labels) == " f"len(data)\nresult: {grper}" ) - self.grouper = None # Try for sanity + self.grouping_vector = None # Try for sanity raise AssertionError(errmsg) - if isinstance(self.grouper, np.ndarray): + if isinstance(self.grouping_vector, np.ndarray): # if we have a date/time-like grouper, make sure that we have # Timestamps like - self.grouper = sanitize_to_nanoseconds(self.grouper) + self.grouping_vector = sanitize_to_nanoseconds(self.grouping_vector) def __repr__(self) -> str: return f"Grouping({self.name})" @@ -549,11 +551,11 @@ def name(self) -> Hashable: if isinstance(self._orig_grouper, (Index, Series)): return self._orig_grouper.name - elif isinstance(self.grouper, ops.BaseGrouper): - return self.grouper.result_index.name + elif isinstance(self.grouping_vector, ops.BaseGrouper): + return self.grouping_vector.result_index.name - elif isinstance(self.grouper, Index): - return self.grouper.name + elif isinstance(self.grouping_vector, Index): + return self.grouping_vector.name # otherwise we have ndarray or ExtensionArray -> no name return None @@ -580,10 +582,10 @@ def ngroups(self) -> int: @cache_readonly def indices(self): # we have a list of groupers - if isinstance(self.grouper, ops.BaseGrouper): - return self.grouper.indices + if isinstance(self.grouping_vector, ops.BaseGrouper): + return self.grouping_vector.indices - values = Categorical(self.grouper) + values = Categorical(self.grouping_vector) return values._reverse_indexer() @property @@ -624,7 +626,7 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]: if self._passed_categorical: # we make a CategoricalIndex out of the cat grouper # preserving the categories / ordered attributes - cat = self.grouper + cat = self.grouping_vector categories = cat.categories if self.observed: @@ -640,10 +642,10 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]: ) return cat.codes, uniques - elif isinstance(self.grouper, ops.BaseGrouper): + elif isinstance(self.grouping_vector, ops.BaseGrouper): # we have a list of groupers - codes = self.grouper.codes_info - uniques = self.grouper.result_arraylike + codes = self.grouping_vector.codes_info + uniques = self.grouping_vector.result_arraylike else: # GH35667, replace dropna=False with na_sentinel=None if not self.dropna: @@ -651,7 +653,7 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]: else: na_sentinel = -1 codes, uniques = algorithms.factorize( - self.grouper, sort=self.sort, na_sentinel=na_sentinel + self.grouping_vector, sort=self.sort, na_sentinel=na_sentinel ) return codes, uniques diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 746c6e0056064..b995a74c20a40 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -734,7 +734,7 @@ def _get_grouper(self): We have a specific method of grouping, so cannot convert to a Index for our grouper. """ - return self.groupings[0].grouper + return self.groupings[0].grouping_vector @final def _get_group_keys(self): @@ -858,7 +858,7 @@ def groups(self) -> dict[Hashable, np.ndarray]: if len(self.groupings) == 1: return self.groupings[0].groups else: - to_groupby = zip(*(ping.grouper for ping in self.groupings)) + to_groupby = zip(*(ping.grouping_vector for ping in self.groupings)) index = Index(to_groupby) return self.axis.groupby(index) diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py index 30b115b9dba6f..9b2792f3b0aea 100644 --- a/pandas/tests/extension/base/groupby.py +++ b/pandas/tests/extension/base/groupby.py @@ -15,8 +15,8 @@ def test_grouping_grouper(self, data_for_grouping): gr1 = df.groupby("A").grouper.groupings[0] gr2 = df.groupby("B").grouper.groupings[0] - tm.assert_numpy_array_equal(gr1.grouper, df.A.values) - tm.assert_extension_array_equal(gr2.grouper, data_for_grouping) + tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values) + tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping) @pytest.mark.parametrize("as_index", [True, False]) def test_groupby_extension_agg(self, as_index, data_for_grouping): diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index 33d82a1d64fb7..8a52734b27bc7 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -262,8 +262,8 @@ def test_grouping_grouper(self, data_for_grouping): gr1 = df.groupby("A").grouper.groupings[0] gr2 = df.groupby("B").grouper.groupings[0] - tm.assert_numpy_array_equal(gr1.grouper, df.A.values) - tm.assert_extension_array_equal(gr2.grouper, data_for_grouping) + tm.assert_numpy_array_equal(gr1.grouping_vector, df.A.values) + tm.assert_extension_array_equal(gr2.grouping_vector, data_for_grouping) @pytest.mark.parametrize("as_index", [True, False]) def test_groupby_extension_agg(self, as_index, data_for_grouping): diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index b601ba92886d9..0a33a2bbe1d0a 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -161,7 +161,7 @@ def test_agg_grouping_is_list_tuple(ts): df = tm.makeTimeDataFrame() grouped = df.groupby(lambda x: x.year) - grouper = grouped.grouper.groupings[0].grouper + grouper = grouped.grouper.groupings[0].grouping_vector grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper)) result = grouped.agg(np.mean)