From 5f2f6a5aeb59787a639a7279c87f6c8de76ee2f7 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 May 2021 18:18:27 -0700 Subject: [PATCH 1/2] REF: Grouping.index -> Grouping._index --- pandas/core/groupby/grouper.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 4aac2630feb2c..6460350b10122 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -456,7 +456,7 @@ def __init__( self._orig_grouper = grouper self.grouper = _convert_grouper(index, grouper) self.all_grouper = None - self.index = index + self._index = index self.sort = sort self.obj = obj self.observed = observed @@ -470,11 +470,16 @@ def __init__( ilevel = self._ilevel if ilevel is not None: + mapper = self.grouper ( self.grouper, # Index self._codes, self._group_index, - ) = index._get_grouper_for_level(self.grouper, ilevel) + ) = index._get_grouper_for_level(mapper, ilevel) + + # In extant tests, the new self.grouper matches + # `index.get_level_values(ilevel)` whenever + # mapper is None and isinstance(index, MultiIndex) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get codes @@ -516,10 +521,9 @@ def __init__( if getattr(self.grouper, "ndim", 1) != 1: t = self.name or str(type(self.grouper)) raise ValueError(f"Grouper for '{t}' not 1-dimensional") - self.grouper = self.index.map(self.grouper) + self.grouper = index.map(self.grouper) if not ( - hasattr(self.grouper, "__len__") - and len(self.grouper) == len(self.index) + hasattr(self.grouper, "__len__") and len(self.grouper) == len(index) ): grper = pprint_thing(self.grouper) errmsg = ( @@ -544,7 +548,7 @@ def __iter__(self): def name(self) -> Hashable: ilevel = self._ilevel if ilevel is not None: - return self.index.names[ilevel] + return self._index.names[ilevel] if isinstance(self._orig_grouper, (Index, Series)): return self._orig_grouper.name @@ -566,7 +570,7 @@ def _ilevel(self) -> int | None: if level is None: return None if not isinstance(level, int): - index = self.index + index = self._index if level not in index.names: raise AssertionError(f"Level {level} not in index") return index.names.index(level) @@ -658,7 +662,7 @@ def _make_codes(self) -> None: @cache_readonly def groups(self) -> dict[Hashable, np.ndarray]: - return self.index.groupby(Categorical.from_codes(self.codes, self.group_index)) + return self._index.groupby(Categorical.from_codes(self.codes, self.group_index)) def get_grouper( From 115cc87708566134f7df9b978410a0ab41d41d17 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 17 May 2021 19:08:54 -0700 Subject: [PATCH 2/2] REF: privatize Grouping attrs --- pandas/core/groupby/grouper.py | 27 +++++++++++++++------------ pandas/core/groupby/ops.py | 4 ++-- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 6460350b10122..7db969b3d0da2 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -440,6 +440,9 @@ class Grouping: _codes: np.ndarray | None = None _group_index: Index | None = None + _passed_categorical: bool + _all_grouper: Categorical | None + _index: Index def __init__( self, @@ -455,13 +458,13 @@ def __init__( self.level = level self._orig_grouper = grouper self.grouper = _convert_grouper(index, grouper) - self.all_grouper = None + self._all_grouper = None self._index = index - self.sort = sort + self._sort = sort self.obj = obj - self.observed = observed + self._observed = observed self.in_axis = in_axis - self.dropna = dropna + self._dropna = dropna self._passed_categorical = False @@ -510,8 +513,8 @@ def __init__( if is_categorical_dtype(self.grouper): self._passed_categorical = True - self.grouper, self.all_grouper = recode_for_groupby( - self.grouper, self.sort, observed + self.grouper, self._all_grouper = recode_for_groupby( + self.grouper, sort, observed ) # no level passed @@ -605,10 +608,10 @@ def codes(self) -> np.ndarray: @cache_readonly def result_index(self) -> Index: - if self.all_grouper is not None: + if self._all_grouper is not None: group_idx = self.group_index assert isinstance(group_idx, CategoricalIndex) - return recode_from_groupby(self.all_grouper, self.sort, group_idx) + return recode_from_groupby(self._all_grouper, self._sort, group_idx) return self.group_index @cache_readonly @@ -619,10 +622,10 @@ def group_index(self) -> Index: cat = self.grouper categories = cat.categories - if self.observed: + if self._observed: codes = algorithms.unique1d(cat.codes) codes = codes[codes != -1] - if self.sort or cat.ordered: + if self._sort or cat.ordered: codes = np.sort(codes) else: codes = np.arange(len(categories)) @@ -649,12 +652,12 @@ def _make_codes(self) -> None: uniques = self.grouper.result_index else: # GH35667, replace dropna=False with na_sentinel=None - if not self.dropna: + if not self._dropna: na_sentinel = None else: na_sentinel = -1 codes, uniques = algorithms.factorize( - self.grouper, sort=self.sort, na_sentinel=na_sentinel + self.grouper, sort=self._sort, na_sentinel=na_sentinel ) uniques = Index(uniques, name=self.name) self._codes = codes diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index b37467fb8cf11..13aea32b76373 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -678,7 +678,7 @@ def __init__( self.axis = axis self._groupings: list[grouper.Grouping] = list(groupings) - self.sort = sort + self._sort = sort self.group_keys = group_keys self.mutated = mutated self.indexer = indexer @@ -891,7 +891,7 @@ def codes_info(self) -> np.ndarray: def _get_compressed_codes(self) -> tuple[np.ndarray, np.ndarray]: if len(self.groupings) > 1: group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True) - return compress_group_index(group_index, sort=self.sort) + return compress_group_index(group_index, sort=self._sort) ping = self.groupings[0] return ping.codes, np.arange(len(ping.group_index))