REF: dont pass keys through wrap_applied_output (#43479)

jbrockmendel · web-flow · commit 805f0d9b941f · 2021-09-09T15:39:43.000-04:00
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -369,7 +369,6 @@ def _indexed_output_to_ndframe(
     def _wrap_applied_output(
         self,
         data: Series,
-        keys: Index,
         values: list[Any] | None,
         not_indexed_same: bool = False,
     ) -> DataFrame | Series:
@@ -380,8 +379,6 @@ def _wrap_applied_output(
         ----------
         data : Series
             Input data for groupby operation.
-        keys : Index
-            Keys of groups that Series was grouped by.
         values : Optional[List[Any]]
             Applied output for each group.
         not_indexed_same : bool, default False
@@ -391,6 +388,8 @@ def _wrap_applied_output(
         -------
         DataFrame or Series
         """
+        keys = self.grouper.group_keys_seq
+
         if len(keys) == 0:
             # GH #6265
             return self.obj._constructor(
@@ -412,7 +411,7 @@ def _wrap_applied_output(
             res_ser.name = self.obj.name
             return res_ser
         elif isinstance(values[0], (Series, DataFrame)):
-            return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
+            return self._concat_objects(values, not_indexed_same=not_indexed_same)
         else:
             # GH #6265 #24880
             result = self.obj._constructor(
@@ -1100,7 +1099,9 @@ def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame:
         res_df.columns = obj.columns
         return res_df
 
-    def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
+    def _wrap_applied_output(self, data, values, not_indexed_same=False):
+        keys = self.grouper.group_keys_seq
+
         if len(keys) == 0:
             result = self.obj._constructor(
                 index=self.grouper.result_index, columns=data.columns
@@ -1115,7 +1116,7 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
             # GH9684 - All values are None, return an empty frame.
             return self.obj._constructor()
         elif isinstance(first_not_none, DataFrame):
-            return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
+            return self._concat_objects(values, not_indexed_same=not_indexed_same)
 
         key_index = self.grouper.result_index if self.as_index else None
 
@@ -1143,12 +1144,11 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False):
         else:
             # values are Series
             return self._wrap_applied_output_series(
-                keys, values, not_indexed_same, first_not_none, key_index
+                values, not_indexed_same, first_not_none, key_index
             )
 
     def _wrap_applied_output_series(
         self,
-        keys,
         values: list[Series],
         not_indexed_same: bool,
         first_not_none,
@@ -1171,6 +1171,7 @@ def _wrap_applied_output_series(
 
             # assign the name to this series
             if singular_series:
+                keys = self.grouper.group_keys_seq
                 values[0].name = keys[0]
 
                 # GH2893
@@ -1179,9 +1180,7 @@ def _wrap_applied_output_series(
                 # if any of the sub-series are not indexed the same
                 # OR we don't have a multi-index and we have only a
                 # single values
-                return self._concat_objects(
-                    keys, values, not_indexed_same=not_indexed_same
-                )
+                return self._concat_objects(values, not_indexed_same=not_indexed_same)
 
             # still a series
             # path added as of GH 5545
@@ -1192,7 +1191,7 @@ def _wrap_applied_output_series(
 
         if not all_indexed_same:
             # GH 8467
-            return self._concat_objects(keys, values, not_indexed_same=True)
+            return self._concat_objects(values, not_indexed_same=True)
 
         # Combine values
         # vstack+constructor is faster than concat and handles MI-columns
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -998,7 +998,7 @@ def _iterate_slices(self) -> Iterable[Series]:
     # Dispatch/Wrapping
 
     @final
-    def _concat_objects(self, keys, values, not_indexed_same: bool = False):
+    def _concat_objects(self, values, not_indexed_same: bool = False):
         from pandas.core.reshape.concat import concat
 
         def reset_identity(values):
@@ -1035,7 +1035,7 @@ def reset_identity(values):
             if self.as_index:
 
                 # possible MI return case
-                group_keys = keys
+                group_keys = self.grouper.group_keys_seq
                 group_levels = self.grouper.levels
                 group_names = self.grouper.names
 
@@ -1171,7 +1171,7 @@ def _wrap_transformed_output(
         result.index = self.obj.index
         return result
 
-    def _wrap_applied_output(self, data, keys, values, not_indexed_same: bool = False):
+    def _wrap_applied_output(self, data, values, not_indexed_same: bool = False):
         raise AbstractMethodError(self)
 
     def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
@@ -1215,7 +1215,7 @@ def _group_keys_index(self) -> Index:
         # The index to use for the result of Groupby Aggregations.
         # This _may_ be redundant with self.grouper.result_index, but that
         #  has not been conclusively proven yet.
-        keys = self.grouper._get_group_keys()
+        keys = self.grouper.group_keys_seq
         if self.grouper.nkeys > 1:
             index = MultiIndex.from_tuples(keys, names=self.grouper.names)
         else:
@@ -1256,7 +1256,7 @@ def _transform_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs)
         data and indices into a Numba jitted function.
         """
         starts, ends, sorted_index, sorted_data = self._numba_prep(func, data)
-        group_keys = self.grouper._get_group_keys()
+        group_keys = self.grouper.group_keys_seq
 
         numba_transform_func = numba_.generate_numba_transform_func(
             kwargs, func, engine_kwargs
@@ -1393,13 +1393,13 @@ def _python_apply_general(
         Series or DataFrame
             data after applying f
         """
-        keys, values, mutated = self.grouper.apply(f, data, self.axis)
+        values, mutated = self.grouper.apply(f, data, self.axis)
 
         if not_indexed_same is None:
             not_indexed_same = mutated or self.mutated
 
         return self._wrap_applied_output(
-            data, keys, values, not_indexed_same=not_indexed_same
+            data, values, not_indexed_same=not_indexed_same
         )
 
     @final
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -690,7 +690,7 @@ def get_iterator(
         for each group
         """
         splitter = self._get_splitter(data, axis=axis)
-        keys = self._get_group_keys()
+        keys = self.group_keys_seq
         for key, group in zip(keys, splitter):
             yield key, group.__finalize__(data, method="groupby")
 
@@ -716,7 +716,8 @@ def _get_grouper(self):
         return self.groupings[0].grouping_vector
 
     @final
-    def _get_group_keys(self):
+    @cache_readonly
+    def group_keys_seq(self):
         if len(self.groupings) == 1:
             return self.levels[0]
         else:
@@ -726,10 +727,10 @@ def _get_group_keys(self):
             return get_flattened_list(ids, ngroups, self.levels, self.codes)
 
     @final
-    def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
+    def apply(self, f: F, data: FrameOrSeries, axis: int = 0) -> tuple[list, bool]:
         mutated = self.mutated
         splitter = self._get_splitter(data, axis=axis)
-        group_keys = self._get_group_keys()
+        group_keys = self.group_keys_seq
         result_values = []
 
         # This calls DataSplitter.__iter__
@@ -745,7 +746,7 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
                 mutated = True
             result_values.append(res)
 
-        return group_keys, result_values, mutated
+        return result_values, mutated
 
     @cache_readonly
     def indices(self):