diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9d6d2d698dfe5..bfcb62015f118 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -336,14 +336,12 @@ def _aggregate_multiple_funcs(self, arg): # let higher level handle return results - # Argument 1 to "_wrap_aggregated_output" of "SeriesGroupBy" has - # incompatible type "Dict[OutputKey, Union[DataFrame, - # Series]]"; - # expected "Mapping[OutputKey, Union[Series, ndarray]]" - output = self._wrap_aggregated_output( - results, index=None # type: ignore[arg-type] - ) - return self.obj._constructor_expanddim(output, columns=columns) + indexed_output = {key.position: val for key, val in results.items()} + output = self.obj._constructor_expanddim(indexed_output, index=None) + output.columns = Index(key.label for key in results) + + output = self._reindex_output(output) + return output def _cython_agg_general( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 @@ -371,78 +369,36 @@ def _cython_agg_general( if not output: raise DataError("No numeric types to aggregate") - # error: Argument 1 to "_wrap_aggregated_output" of "BaseGroupBy" has - # incompatible type "Dict[OutputKey, Union[ndarray, DatetimeArray]]"; - # expected "Mapping[OutputKey, ndarray]" - return self._wrap_aggregated_output( - output, index=self.grouper.result_index # type: ignore[arg-type] - ) - - # TODO: index should not be Optional - see GH 35490 - def _wrap_series_output( - self, - output: Mapping[base.OutputKey, Series | ArrayLike], - index: Index | None, - ) -> FrameOrSeriesUnion: - """ - Wraps the output of a SeriesGroupBy operation into the expected result. - - Parameters - ---------- - output : Mapping[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]] - Data to wrap. - index : pd.Index or None - Index to apply to the output. - - Returns - ------- - Series or DataFrame - - Notes - ----- - In the vast majority of cases output and columns will only contain one - element. The exception is operations that expand dimensions, like ohlc. - """ - indexed_output = {key.position: val for key, val in output.items()} - columns = Index(key.label for key in output) + return self._wrap_aggregated_output(output) - result: FrameOrSeriesUnion - if len(output) > 1: - result = self.obj._constructor_expanddim(indexed_output, index=index) - result.columns = columns - elif not columns.empty: - result = self.obj._constructor( - indexed_output[0], index=index, name=columns[0] - ) - else: - result = self.obj._constructor_expanddim() - - return result - - # TODO: Remove index argument, use self.grouper.result_index, see GH 35490 def _wrap_aggregated_output( self, - output: Mapping[base.OutputKey, Series | np.ndarray], - index: Index | None, - ) -> FrameOrSeriesUnion: + output: Mapping[base.OutputKey, Series | ArrayLike], + ) -> Series: """ Wraps the output of a SeriesGroupBy aggregation into the expected result. Parameters ---------- - output : Mapping[base.OutputKey, Union[Series, np.ndarray]] + output : Mapping[base.OutputKey, Union[Series, ArrayLike]] Data to wrap. Returns ------- - Series or DataFrame + Series Notes ----- In the vast majority of cases output will only contain one element. The exception is operations that expand dimensions, like ohlc. """ - result = self._wrap_series_output(output=output, index=index) + assert len(output) == 1 + + name = self.obj.name + index = self.grouper.result_index + values = next(iter(output.values())) + + result = self.obj._constructor(values, index=index, name=name) return self._reindex_output(result) def _wrap_transformed_output( @@ -466,7 +422,10 @@ def _wrap_transformed_output( for consistency with DataFrame methods and _wrap_aggregated_output. """ assert len(output) == 1 - result = self._wrap_series_output(output=output, index=self.obj.index) + + name = self.obj.name + values = next(iter(output.values())) + result = self.obj._constructor(values, index=self.obj.index, name=name) # No transformations increase the ndim of the result assert isinstance(result, Series) @@ -1115,14 +1074,6 @@ def _iterate_slices(self) -> Iterable[Series]: def _cython_agg_general( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 ) -> DataFrame: - agg_mgr = self._cython_agg_manager( - how, alt=alt, numeric_only=numeric_only, min_count=min_count - ) - return self._wrap_agged_manager(agg_mgr) - - def _cython_agg_manager( - self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 - ) -> Manager2D: # Note: we never get here with how="ohlc"; that goes through SeriesGroupBy data: Manager2D = self._get_data_to_aggregate() @@ -1186,11 +1137,9 @@ def py_fallback(values: ArrayLike) -> ArrayLike: sgb = get_groupby(obj, self.grouper, observed=True) result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) - assert isinstance(result, (Series, DataFrame)) # for mypy # In the case of object dtype block, it may have been split # in the operation. We un-split here. result = result._consolidate() - assert isinstance(result, (Series, DataFrame)) # for mypy # unwrap DataFrame/Series to get array mgr = result._mgr arrays = mgr.arrays @@ -1226,7 +1175,7 @@ def array_func(values: ArrayLike) -> ArrayLike: if not len(new_mgr): raise DataError("No numeric types to aggregate") - return new_mgr + return self._wrap_agged_manager(new_mgr) def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: if self.grouper.nkeys != 1: @@ -1733,7 +1682,6 @@ def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None: def _wrap_aggregated_output( self, output: Mapping[base.OutputKey, Series | np.ndarray], - index: Index | None, ) -> DataFrame: """ Wraps the output of DataFrameGroupBy aggregations into the expected result. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f579b04db898e..ce7f0de616e18 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1056,9 +1056,7 @@ def _set_result_index_ordered( return result - def _wrap_aggregated_output( - self, output: Mapping[base.OutputKey, np.ndarray], index: Index | None - ): + def _wrap_aggregated_output(self, output: Mapping[base.OutputKey, np.ndarray]): raise AbstractMethodError(self) def _wrap_transformed_output(self, output: Mapping[base.OutputKey, ArrayLike]): @@ -1259,7 +1257,7 @@ def _python_agg_general(self, func, *args, **kwargs): if not output: return self._python_apply_general(f, self._selected_obj) - return self._wrap_aggregated_output(output, index=self.grouper.result_index) + return self._wrap_aggregated_output(output) @final def _agg_general( @@ -2786,7 +2784,7 @@ def _get_cythonized_result( raise TypeError(error_msg) if aggregate: - return self._wrap_aggregated_output(output, index=self.grouper.result_index) + return self._wrap_aggregated_output(output) else: return self._wrap_transformed_output(output)