diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 8bb8f00b4c406..7af32d70c00bc 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -431,16 +431,9 @@ def _wrap_applied_output( ) assert values is not None - def _get_index() -> Index: - if self.grouper.nkeys > 1: - index = MultiIndex.from_tuples(keys, names=self.grouper.names) - else: - index = Index._with_infer(keys, name=self.grouper.names[0]) - return index - if isinstance(values[0], dict): # GH #823 #24880 - index = _get_index() + index = self._group_keys_index res_df = self.obj._constructor_expanddim(values, index=index) res_df = self._reindex_output(res_df) # if self.observed is False, @@ -453,7 +446,7 @@ def _get_index() -> Index: else: # GH #6265 #24880 result = self.obj._constructor( - data=values, index=_get_index(), name=self.obj.name + data=values, index=self._group_keys_index, name=self.obj.name ) return self._reindex_output(result) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a60ec29581337..d83b3ee1738bf 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1177,6 +1177,18 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool: # expected "bool") return numeric_only # type: ignore[return-value] + @cache_readonly + def _group_keys_index(self) -> Index: + # The index to use for the result of Groupby Aggregations. + # This _may_ be redundant with self.grouper.result_index, but that + # has not been conclusively proven yet. + keys = self.grouper._get_group_keys() + if self.grouper.nkeys > 1: + index = MultiIndex.from_tuples(keys, names=self.grouper.names) + else: + index = Index._with_infer(keys, name=self.grouper.names[0]) + return index + # ----------------------------------------------------------------- # numba @@ -1244,7 +1256,7 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs) data and indices into a Numba jitted function. """ starts, ends, sorted_index, sorted_data = self._numba_prep(func, data) - group_keys = self.grouper._get_group_keys() + index = self._group_keys_index numba_agg_func = numba_.generate_numba_agg_func(kwargs, func, engine_kwargs) result = numba_agg_func( @@ -1252,7 +1264,7 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs) sorted_index, starts, ends, - len(group_keys), + len(index), len(data.columns), *args, ) @@ -1261,10 +1273,6 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs) if cache_key not in NUMBA_FUNC_CACHE: NUMBA_FUNC_CACHE[cache_key] = numba_agg_func - if self.grouper.nkeys > 1: - index = MultiIndex.from_tuples(group_keys, names=self.grouper.names) - else: - index = Index(group_keys, name=self.grouper.names[0]) return result, index # -----------------------------------------------------------------