From 27d552fb4fd29304e0fa1ab80e597552bbc11823 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 11 May 2021 13:21:51 -0700 Subject: [PATCH] REF: remove name arg from Grouping --- pandas/core/groupby/grouper.py | 90 ++++++++++++++++++++++------------ pandas/core/groupby/ops.py | 2 +- 2 files changed, 59 insertions(+), 33 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index f1762a2535ff7..02555660a6990 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -439,20 +439,22 @@ class Grouping: * groups : dict of {group -> label_list} """ + _codes: np.ndarray | None = None + _group_index: Index | None = None + def __init__( self, index: Index, grouper=None, obj: FrameOrSeries | None = None, - name: Hashable = None, level=None, sort: bool = True, observed: bool = False, in_axis: bool = False, dropna: bool = True, ): - self.name = name self.level = level + self._orig_grouper = grouper self.grouper = _convert_grouper(index, grouper) self.all_grouper = None self.index = index @@ -462,30 +464,20 @@ def __init__( self.in_axis = in_axis self.dropna = dropna - # right place for this? - if isinstance(grouper, (Series, Index)) and name is None: - self.name = grouper.name - if isinstance(grouper, MultiIndex): self.grouper = grouper._values # we have a single grouper which may be a myriad of things, # some of which are dependent on the passing in level - if level is not None: - if not isinstance(level, int): - if level not in index.names: - raise AssertionError(f"Level {level} not in index") - level = index.names.index(level) - - if self.name is None: - self.name = index.names[level] + ilevel = self._ilevel + if ilevel is not None: ( self.grouper, self._codes, self._group_index, - ) = index._get_grouper_for_level(self.grouper, level) + ) = index._get_grouper_for_level(self.grouper, ilevel) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get codes @@ -494,16 +486,22 @@ def __init__( # what key/level refer to exactly, don't need to # check again as we have by this point converted these # to an actual value (rather than a pd.Grouper) - _, grouper, _ = self.grouper._get_grouper( + _, newgrouper, newobj = self.grouper._get_grouper( # error: Value of type variable "FrameOrSeries" of "_get_grouper" # of "Grouper" cannot be "Optional[FrameOrSeries]" self.obj, # type: ignore[type-var] validate=False, ) - if self.name is None: - self.name = grouper.result_index.name - self.obj = self.grouper.obj - self.grouper = grouper._get_grouper() + self.obj = newobj + + ng = newgrouper._get_grouper() + if isinstance(newgrouper, ops.BinGrouper): + # in this case we have `ng is newgrouper` + self.grouper = ng + else: + # ops.BaseGrouper + # use Index instead of ndarray so we can recover the name + self.grouper = Index(ng, name=newgrouper.result_index.name) else: if self.grouper is None and self.name is not None and self.obj is not None: @@ -538,10 +536,6 @@ def __init__( name=self.name, ) - # we are done - if isinstance(self.grouper, Grouping): - self.grouper = self.grouper.grouper - # no level passed elif not isinstance( self.grouper, (Series, Index, ExtensionArray, np.ndarray) @@ -577,8 +571,37 @@ def __repr__(self) -> str: def __iter__(self): return iter(self.indices) - _codes: np.ndarray | None = None - _group_index: Index | None = None + @cache_readonly + def name(self) -> Hashable: + ilevel = self._ilevel + if ilevel is not None: + return self.index.names[ilevel] + + if isinstance(self._orig_grouper, (Index, Series)): + return self._orig_grouper.name + + elif isinstance(self.grouper, ops.BaseGrouper): + return self.grouper.result_index.name + + elif isinstance(self.grouper, Index): + return self.grouper.name + + return None + + @cache_readonly + def _ilevel(self) -> int | None: + """ + If necessary, converted index level name to index level position. + """ + level = self.level + if level is None: + return None + if not isinstance(level, int): + index = self.index + if level not in index.names: + raise AssertionError(f"Level {level} not in index") + return index.names.index(level) + return level @property def ngroups(self) -> int: @@ -806,25 +829,29 @@ def is_in_obj(gpr) -> bool: for gpr, level in zip(keys, levels): if is_in_obj(gpr): # df.groupby(df['name']) - in_axis, name = True, gpr.name - exclusions.add(name) + in_axis = True + exclusions.add(gpr.name) elif is_in_axis(gpr): # df.groupby('name') if gpr in obj: if validate: obj._check_label_or_level_ambiguity(gpr, axis=axis) in_axis, name, gpr = True, gpr, obj[gpr] + if gpr.ndim != 1: + # non-unique columns; raise here to get the name in the + # exception message + raise ValueError(f"Grouper for '{name}' not 1-dimensional") exclusions.add(name) elif obj._is_level_reference(gpr, axis=axis): - in_axis, name, level, gpr = False, None, gpr, None + in_axis, level, gpr = False, gpr, None else: raise KeyError(gpr) elif isinstance(gpr, Grouper) and gpr.key is not None: # Add key to exclusions exclusions.add(gpr.key) - in_axis, name = False, None + in_axis = False else: - in_axis, name = False, None + in_axis = False if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]: raise ValueError( @@ -839,7 +866,6 @@ def is_in_obj(gpr) -> bool: group_axis, gpr, obj=obj, - name=name, level=level, sort=sort, observed=observed, diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 46b47bc29d8a6..6342b58c1b641 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -1200,7 +1200,7 @@ def names(self) -> list[Hashable]: @property def groupings(self) -> list[grouper.Grouping]: lev = self.binlabels - ping = grouper.Grouping(lev, lev, in_axis=False, level=None, name=lev.name) + ping = grouper.Grouping(lev, lev, in_axis=False, level=None) return [ping] def _aggregate_series_fast(self, obj: Series, func: F) -> np.ndarray: