REF: groupby internal names (pandas-dev#55551)

gusthavoMFS · gusthavoMFS · commit a7c8f77fedc3 · 2023-10-26T15:08:38.000-03:00
diff --git a/pandas/api/typing/__init__.py b/pandas/api/typing/__init__.py
@@ -18,7 +18,7 @@
 )
 from pandas.core.window import (
     Expanding,
-    ExpandingGroupby,
+    ExpandingGroupBy,
     ExponentialMovingWindow,
     ExponentialMovingWindowGroupby,
     Rolling,
@@ -35,7 +35,7 @@
     "DataFrameGroupBy",
     "DatetimeIndexResamplerGroupby",
     "Expanding",
-    "ExpandingGroupby",
+    "ExpandingGroupBy",
     "ExponentialMovingWindow",
     "ExponentialMovingWindowGroupby",
     "JsonReader",
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -281,7 +281,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                 return self.obj._constructor(
                     [],
                     name=self.obj.name,
-                    index=self.grouper.result_index,
+                    index=self.grouper.agg_index,
                     dtype=obj.dtype,
                 )
 
@@ -306,8 +306,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                     stacklevel=find_stack_level(),
                 )
 
-                # result is a dict whose keys are the elements of result_index
-                result = Series(result, index=self.grouper.result_index)
+                # result is a dict whose keys are the elements of agg_index
+                result = Series(result, index=self.grouper.agg_index)
                 result = self._wrap_aggregated_output(result)
                 return result
 
@@ -402,7 +402,7 @@ def _wrap_applied_output(
                 # GH#47787 see test_group_on_empty_multiindex
                 res_index = data.index
             else:
-                res_index = self.grouper.result_index
+                res_index = self.grouper.agg_index
 
             return self.obj._constructor(
                 [],
@@ -414,7 +414,7 @@ def _wrap_applied_output(
 
         if isinstance(values[0], dict):
             # GH #823 #24880
-            index = self.grouper.result_index
+            index = self.grouper.agg_index
             res_df = self.obj._constructor_expanddim(values, index=index)
             res_df = self._reindex_output(res_df)
             # if self.observed is False,
@@ -437,7 +437,7 @@ def _wrap_applied_output(
         else:
             # GH #6265 #24880
             result = self.obj._constructor(
-                data=values, index=self.grouper.result_index, name=self.obj.name
+                data=values, index=self.grouper.agg_index, name=self.obj.name
             )
             if not self.as_index:
                 result = self._insert_inaxis_grouper(result)
@@ -562,7 +562,7 @@ def _transform_general(
             from pandas.core.reshape.concat import concat
 
             concatenated = concat(results)
-            result = self._set_result_index_ordered(concatenated)
+            result = self._set_agg_index_ordered(concatenated)
         else:
             result = self.obj._constructor(dtype=np.float64)
 
@@ -706,7 +706,7 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
                 res = out
         else:
             res = out[1:]
-        ri = self.grouper.result_index
+        ri = self.grouper.agg_index
 
         # we might have duplications among the bins
         if len(res) != len(ri):
@@ -1561,9 +1561,9 @@ def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
             fres = func(grp_df, *args, **kwargs)
             result[name] = fres
 
-        result_index = self.grouper.result_index
+        agg_index = self.grouper.agg_index
         other_ax = obj.axes[1 - self.axis]
-        out = self.obj._constructor(result, index=other_ax, columns=result_index)
+        out = self.obj._constructor(result, index=other_ax, columns=agg_index)
         if self.axis == 0:
             out = out.T
 
@@ -1581,7 +1581,7 @@ def _wrap_applied_output(
                 # GH#47787 see test_group_on_empty_multiindex
                 res_index = data.index
             else:
-                res_index = self.grouper.result_index
+                res_index = self.grouper.agg_index
 
             result = self.obj._constructor(index=res_index, columns=data.columns)
             result = result.astype(data.dtypes, copy=False)
@@ -1601,7 +1601,7 @@ def _wrap_applied_output(
                 is_transform=is_transform,
             )
 
-        key_index = self.grouper.result_index if self.as_index else None
+        key_index = self.grouper.agg_index if self.as_index else None
 
         if isinstance(first_not_none, (np.ndarray, Index)):
             # GH#1738: values is list of arrays of unequal lengths
@@ -1767,7 +1767,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs):
         other_axis = 1 if self.axis == 0 else 0  # switches between 0 & 1
         concatenated = concat(applied, axis=self.axis, verify_integrity=False)
         concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False)
-        return self._set_result_index_ordered(concatenated)
+        return self._set_agg_index_ordered(concatenated)
 
     __examples_dataframe_doc = dedent(
         """
@@ -2048,7 +2048,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:
 
         if not len(results):
             # concat would raise
-            res_df = DataFrame([], columns=columns, index=self.grouper.result_index)
+            res_df = DataFrame([], columns=columns, index=self.grouper.agg_index)
         else:
             res_df = concat(results, keys=columns, axis=1)
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -149,7 +149,7 @@ class providing the base-class of operations.
 
     from pandas.core.resample import Resampler
     from pandas.core.window import (
-        ExpandingGroupby,
+        ExpandingGroupBy,
         ExponentialMovingWindowGroupby,
         RollingGroupby,
     )
@@ -1415,7 +1415,7 @@ def curried(x):
         if self.grouper.has_dropped_na and is_transform:
             # result will have dropped rows due to nans, fill with null
             # and ensure index is ordered same as the input
-            result = self._set_result_index_ordered(result)
+            result = self._set_agg_index_ordered(result)
         return result
 
     # -----------------------------------------------------------------
@@ -1433,7 +1433,7 @@ def _concat_objects(
         if self.group_keys and not is_transform:
             if self.as_index:
                 # possible MI return case
-                group_keys = self.grouper.result_index
+                group_keys = self.grouper.agg_index
                 group_levels = self.grouper.levels
                 group_names = self.grouper.names
 
@@ -1490,10 +1490,10 @@ def _concat_objects(
         return result
 
     @final
-    def _set_result_index_ordered(
+    def _set_agg_index_ordered(
         self, result: OutputFrameOrSeries
     ) -> OutputFrameOrSeries:
-        # set the result index on the passed values object and
+        # set the agg index on the passed values object and
         # return the new object, xref 8046
 
         obj_axis = self.obj._get_axis(self.axis)
@@ -1586,7 +1586,7 @@ def _wrap_aggregated_output(
             index = Index(range(self.grouper.ngroups))
 
         else:
-            index = self.grouper.result_index
+            index = self.grouper.agg_index
 
         if qs is not None:
             # We get here with len(qs) != 1 and not self.as_index
@@ -1674,7 +1674,7 @@ def _numba_agg_general(
         res_mgr = df._mgr.apply(
             aggregator, labels=ids, ngroups=ngroups, **aggregator_kwargs
         )
-        res_mgr.axes[1] = self.grouper.result_index
+        res_mgr.axes[1] = self.grouper.agg_index
         result = df._constructor_from_mgr(res_mgr, axes=res_mgr.axes)
 
         if data.ndim == 1:
@@ -1745,7 +1745,7 @@ def _aggregate_with_numba(self, func, *args, engine_kwargs=None, **kwargs):
             len(df.columns),
             *args,
         )
-        index = self.grouper.result_index
+        index = self.grouper.agg_index
         if data.ndim == 1:
             result_kwargs = {"name": data.name}
             result = result.ravel()
@@ -2038,7 +2038,7 @@ def _wrap_transform_fast_result(self, result: NDFrameT) -> NDFrameT:
 
         # for each col, reshape to size of original frame by take operation
         ids, _, _ = self.grouper.group_info
-        result = result.reindex(self.grouper.result_index, axis=self.axis, copy=False)
+        result = result.reindex(self.grouper.agg_index, axis=self.axis, copy=False)
 
         if self.obj.ndim == 1:
             # i.e. SeriesGroupBy
@@ -2814,7 +2814,7 @@ def _value_counts(
             and not grouping._observed
             for grouping in groupings
         ):
-            levels_list = [ping.result_index for ping in groupings]
+            levels_list = [ping.agg_index for ping in groupings]
             multi_index, _ = MultiIndex.from_product(
                 levels_list, names=[ping.name for ping in groupings]
             ).sortlevel()
@@ -3514,7 +3514,7 @@ def ohlc(self) -> DataFrame:
 
             agg_names = ["open", "high", "low", "close"]
             result = self.obj._constructor_expanddim(
-                res_values, index=self.grouper.result_index, columns=agg_names
+                res_values, index=self.grouper.agg_index, columns=agg_names
             )
             return self._reindex_output(result)
 
@@ -3835,18 +3835,18 @@ def rolling(self, *args, **kwargs) -> RollingGroupby:
     @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def expanding(self, *args, **kwargs) -> ExpandingGroupby:
+    def expanding(self, *args, **kwargs) -> ExpandingGroupBy:
         """
         Return an expanding grouper, providing expanding
         functionality per group.
 
         Returns
         -------
-        pandas.api.typing.ExpandingGroupby
+        pandas.api.typing.ExpandingGroupBy
         """
-        from pandas.core.window import ExpandingGroupby
+        from pandas.core.window import ExpandingGroupBy
 
-        return ExpandingGroupby(
+        return ExpandingGroupBy(
             self._selected_obj,
             *args,
             _grouper=self.grouper,
@@ -5594,7 +5594,7 @@ def _reindex_output(
             output = output.drop(labels=list(g_names), axis=1)
 
         # Set a temp index and reindex (possibly expanding)
-        output = output.set_index(self.grouper.result_index).reindex(
+        output = output.set_index(self.grouper.agg_index).reindex(
             index, copy=False, fill_value=fill_value
         )
 
@@ -5782,8 +5782,8 @@ def _idxmax_idxmin(
             if len(self.grouper.groupings) == 1:
                 result_len = len(self.grouper.groupings[0].grouping_vector.unique())
             else:
-                # result_index only contains observed groups in this case
-                result_len = len(self.grouper.result_index)
+                # agg_index only contains observed groups in this case
+                result_len = len(self.grouper.agg_index)
             assert result_len <= expected_len
             has_unobserved = result_len < expected_len
 
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -591,7 +591,7 @@ def __init__(
                 # error: Cannot determine type of "grouping_vector"  [has-type]
                 ng = newgrouper.groupings[0].grouping_vector  # type: ignore[has-type]
                 # use Index instead of ndarray so we can recover the name
-                grouping_vector = Index(ng, name=newgrouper.result_index.name)
+                grouping_vector = Index(ng, name=newgrouper.agg_index.name)
 
         elif not isinstance(
             grouping_vector, (Series, Index, ExtensionArray, np.ndarray)
@@ -651,7 +651,7 @@ def name(self) -> Hashable:
             return self._orig_grouper.name
 
         elif isinstance(self.grouping_vector, ops.BaseGrouper):
-            return self.grouping_vector.result_index.name
+            return self.grouping_vector.agg_index.name
 
         elif isinstance(self.grouping_vector, Index):
             return self.grouping_vector.name
@@ -694,21 +694,21 @@ def codes(self) -> npt.NDArray[np.signedinteger]:
     @cache_readonly
     def group_arraylike(self) -> ArrayLike:
         """
-        Analogous to result_index, but holding an ArrayLike to ensure
+        Analogous to agg_index, but holding an ArrayLike to ensure
         we can retain ExtensionDtypes.
         """
         if self._all_grouper is not None:
             # retain dtype for categories, including unobserved ones
-            return self.result_index._values
+            return self.agg_index._values
 
         elif self._passed_categorical:
             return self.group_index._values
 
         return self._codes_and_uniques[1]
 
     @cache_readonly
-    def result_index(self) -> Index:
-        # result_index retains dtype for categories, including unobserved ones,
+    def agg_index(self) -> Index:
+        # agg_index retains dtype for categories, including unobserved ones,
         #  which group_index does not
         if self._all_grouper is not None:
             group_idx = self.group_index
@@ -788,7 +788,7 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
         elif isinstance(self.grouping_vector, ops.BaseGrouper):
             # we have a list of groupers
             codes = self.grouping_vector.codes_info
-            uniques = self.grouping_vector.result_index._values
+            uniques = self.grouping_vecto.agg_index._values
         elif self._uniques is not None:
             # GH#50486 Code grouping_vector using _uniques; allows
             # including uniques that are not present in grouping_vector.
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -634,7 +634,7 @@ def group_keys_seq(self):
     @cache_readonly
     def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
         """dict {group name -> group indices}"""
-        if len(self.groupings) == 1 and isinstance(self.result_index, CategoricalIndex):
+        if len(self.groupings) == 1 and isinstance(self.agg_index, CategoricalIndex):
             # This shows unused categories in indices GH#38642
             return self.groupings[0].indices
         codes_list = [ping.codes for ping in self.groupings]
@@ -644,7 +644,7 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]:
     @final
     def result_ilocs(self) -> npt.NDArray[np.intp]:
         """
-        Get the original integer locations of result_index in the input.
+        Get the original integer locations of agg_index in the input.
         """
         # Original indices are where group_index would go via sorting.
         # But when dropna is true, we need to remove null values while accounting for
@@ -692,7 +692,7 @@ def size(self) -> Series:
             out = np.bincount(ids[ids != -1], minlength=ngroups)
         else:
             out = []
-        return Series(out, index=self.result_index, dtype="int64")
+        return Series(out, index=self.agg_index, dtype="int64")
 
     @cache_readonly
     def groups(self) -> dict[Hashable, np.ndarray]:
@@ -755,7 +755,7 @@ def _get_compressed_codes(
     @final
     @cache_readonly
     def ngroups(self) -> int:
-        return len(self.result_index)
+        return len(self.agg_index)
 
     @property
     def reconstructed_codes(self) -> list[npt.NDArray[np.intp]]:
@@ -764,12 +764,12 @@ def reconstructed_codes(self) -> list[npt.NDArray[np.intp]]:
         return decons_obs_group_ids(ids, obs_ids, self.shape, codes, xnull=True)
 
     @cache_readonly
-    def result_index(self) -> Index:
+    def agg_index(self) -> Index:
         if len(self.groupings) == 1:
-            return self.groupings[0].result_index.rename(self.names[0])
+            return self.groupings[0].agg_index.rename(self.names[0])
 
         codes = self.reconstructed_codes
-        levels = [ping.result_index for ping in self.groupings]
+        levels = [ping.agg_index for ping in self.groupings]
         return MultiIndex(
             levels=levels, codes=codes, verify_integrity=False, names=self.names
         )
@@ -1071,7 +1071,7 @@ def reconstructed_codes(self) -> list[np.ndarray]:
         return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1]) + 1]]
 
     @cache_readonly
-    def result_index(self) -> Index:
+    def agg_index(self) -> Index:
         if len(self.binlabels) != 0 and isna(self.binlabels[0]):
             return self.binlabels[1:]
 
diff --git a/pandas/core/window/__init__.py b/pandas/core/window/__init__.py
@@ -4,7 +4,7 @@
 )
 from pandas.core.window.expanding import (
     Expanding,
-    ExpandingGroupby,
+    ExpandingGroupBy,
 )
 from pandas.core.window.rolling import (
     Rolling,
@@ -14,7 +14,7 @@
 
 __all__ = [
     "Expanding",
-    "ExpandingGroupby",
+    "ExpandingGroupBy",
     "ExponentialMovingWindow",
     "ExponentialMovingWindowGroupby",
     "Rolling",
diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py