REF: combine groupby helpers (pandas-dev#51516)

jbrockmendel · web-flow · commit dc947a459b09 · 2023-02-21T10:53:36.000-08:00
* REF: simplify _apply_to_column_groupbys, rename apply-&gt;apply_groupwise

* REF: consolidate self.as_index check

* REF: remove unused _iterate_slices
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -154,9 +154,6 @@ def _get_data_to_aggregate(
             )
         return single
 
-    def _iterate_slices(self) -> Iterable[Series]:
-        yield self._selected_obj
-
     _agg_examples_doc = dedent(
         """
     Examples
@@ -408,7 +405,9 @@ def _aggregate_named(self, func, *args, **kwargs):
         result = {}
         initialized = False
 
-        for name, group in self:
+        for name, group in self.grouper.get_iterator(
+            self._selected_obj, axis=self.axis
+        ):
             object.__setattr__(group, "name", name)
 
             output = func(group, *args, **kwargs)
@@ -568,7 +567,11 @@ def true_and_notna(x) -> bool:
 
         try:
             indices = [
-                self._get_index(name) for name, group in self if true_and_notna(group)
+                self._get_index(name)
+                for name, group in self.grouper.get_iterator(
+                    self._selected_obj, axis=self.axis
+                )
+                if true_and_notna(group)
             ]
         except (ValueError, TypeError) as err:
             raise TypeError("the filter must return a boolean result") from err
@@ -1850,29 +1853,33 @@ def _indexed_output_to_ndframe(
     def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame:
         return self.obj._constructor(mgr)
 
-    def _iterate_column_groupbys(self, obj: DataFrame):
-        for i, colname in enumerate(obj.columns):
-            yield colname, SeriesGroupBy(
+    def _apply_to_column_groupbys(self, func) -> DataFrame:
+        from pandas.core.reshape.concat import concat
+
+        obj = self._obj_with_exclusions
+        columns = obj.columns
+        sgbs = [
+            SeriesGroupBy(
                 obj.iloc[:, i],
                 selection=colname,
                 grouper=self.grouper,
                 exclusions=self.exclusions,
                 observed=self.observed,
             )
-
-    def _apply_to_column_groupbys(self, func, obj: DataFrame) -> DataFrame:
-        from pandas.core.reshape.concat import concat
-
-        columns = obj.columns
-        results = [
-            func(col_groupby) for _, col_groupby in self._iterate_column_groupbys(obj)
+            for i, colname in enumerate(obj.columns)
         ]
+        results = [func(sgb) for sgb in sgbs]
 
         if not len(results):
             # concat would raise
-            return DataFrame([], columns=columns, index=self.grouper.result_index)
+            res_df = DataFrame([], columns=columns, index=self.grouper.result_index)
         else:
-            return concat(results, keys=columns, axis=1)
+            res_df = concat(results, keys=columns, axis=1)
+
+        if not self.as_index:
+            res_df.index = default_index(len(res_df))
+            res_df = self._insert_inaxis_grouper(res_df)
+        return res_df
 
     def nunique(self, dropna: bool = True) -> DataFrame:
         """
@@ -1925,16 +1932,7 @@ def nunique(self, dropna: bool = True) -> DataFrame:
                 lambda sgb: sgb.nunique(dropna), self._obj_with_exclusions, is_agg=True
             )
 
-        obj = self._obj_with_exclusions
-        results = self._apply_to_column_groupbys(
-            lambda sgb: sgb.nunique(dropna), obj=obj
-        )
-
-        if not self.as_index:
-            results.index = default_index(len(results))
-            results = self._insert_inaxis_grouper(results)
-
-        return results
+        return self._apply_to_column_groupbys(lambda sgb: sgb.nunique(dropna))
 
     def idxmax(
         self,
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -19,7 +19,6 @@ class providing the base-class of operations.
     TYPE_CHECKING,
     Callable,
     Hashable,
-    Iterable,
     Iterator,
     List,
     Literal,
@@ -990,12 +989,6 @@ def curried(x):
             result = self._set_result_index_ordered(result)
         return result
 
-    # -----------------------------------------------------------------
-    # Selection
-
-    def _iterate_slices(self) -> Iterable[Series]:
-        raise AbstractMethodError(self)
-
     # -----------------------------------------------------------------
     # Dispatch/Wrapping
 
@@ -1398,7 +1391,7 @@ def _python_apply_general(
         Series or DataFrame
             data after applying f
         """
-        values, mutated = self.grouper.apply(f, data, self.axis)
+        values, mutated = self.grouper.apply_groupwise(f, data, self.axis)
         if not_indexed_same is None:
             not_indexed_same = mutated
 
@@ -2462,7 +2455,6 @@ def ohlc(self) -> DataFrame:
             Open, high, low and close values within each group.
         """
         if self.obj.ndim == 1:
-            # self._iterate_slices() yields only self._selected_obj
             obj = self._selected_obj
 
             is_numeric = is_numeric_dtype(obj.dtype)
@@ -2479,12 +2471,7 @@ def ohlc(self) -> DataFrame:
             )
             return self._reindex_output(result)
 
-        result = self._apply_to_column_groupbys(
-            lambda x: x.ohlc(), self._obj_with_exclusions
-        )
-        if not self.as_index:
-            result = self._insert_inaxis_grouper(result)
-            result.index = default_index(len(result))
+        result = self._apply_to_column_groupbys(lambda sgb: sgb.ohlc())
         return result
 
     @doc(DataFrame.describe)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -748,7 +748,7 @@ def group_keys_seq(self):
             return get_flattened_list(ids, ngroups, self.levels, self.codes)
 
     @final
-    def apply(
+    def apply_groupwise(
         self, f: Callable, data: DataFrame | Series, axis: AxisInt = 0
     ) -> tuple[list, bool]:
         mutated = False