From 81897cd926e9b6653bfb0470af11d7bfcf775ba7 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 23 Apr 2021 12:49:42 -0700 Subject: [PATCH 1/6] REF: remove index kwarg from _wrap_aggregated_output --- pandas/core/groupby/generic.py | 45 ++++++++++++++-------------------- pandas/core/groupby/groupby.py | 8 +++--- 2 files changed, 21 insertions(+), 32 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9d6d2d698dfe5..3d179331f3a1c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -336,14 +336,13 @@ def _aggregate_multiple_funcs(self, arg): # let higher level handle return results - # Argument 1 to "_wrap_aggregated_output" of "SeriesGroupBy" has - # incompatible type "Dict[OutputKey, Union[DataFrame, - # Series]]"; - # expected "Mapping[OutputKey, Union[Series, ndarray]]" - output = self._wrap_aggregated_output( - results, index=None # type: ignore[arg-type] - ) - return self.obj._constructor_expanddim(output, columns=columns) + # Otherwise, the user-provided functions were not all reducing, see GH#35490 + indexed_output = {key.position: val for key, val in results.items()} + output = self.obj._constructor_expanddim(indexed_output, index=None) + output.columns = Index(key.label for key in results) + + output = self._reindex_output(output) + return output def _cython_agg_general( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 @@ -371,18 +370,11 @@ def _cython_agg_general( if not output: raise DataError("No numeric types to aggregate") - # error: Argument 1 to "_wrap_aggregated_output" of "BaseGroupBy" has - # incompatible type "Dict[OutputKey, Union[ndarray, DatetimeArray]]"; - # expected "Mapping[OutputKey, ndarray]" - return self._wrap_aggregated_output( - output, index=self.grouper.result_index # type: ignore[arg-type] - ) + return self._wrap_aggregated_output(output) - # TODO: index should not be Optional - see GH 35490 def _wrap_series_output( self, output: Mapping[base.OutputKey, Series | ArrayLike], - index: Index | None, ) -> FrameOrSeriesUnion: """ Wraps the output of a SeriesGroupBy operation into the expected result. @@ -391,8 +383,6 @@ def _wrap_series_output( ---------- output : Mapping[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]] Data to wrap. - index : pd.Index or None - Index to apply to the output. Returns ------- @@ -403,6 +393,8 @@ def _wrap_series_output( In the vast majority of cases output and columns will only contain one element. The exception is operations that expand dimensions, like ohlc. """ + index = self.grouper.result_index + indexed_output = {key.position: val for key, val in output.items()} columns = Index(key.label for key in output) @@ -419,18 +411,16 @@ def _wrap_series_output( return result - # TODO: Remove index argument, use self.grouper.result_index, see GH 35490 def _wrap_aggregated_output( self, - output: Mapping[base.OutputKey, Series | np.ndarray], - index: Index | None, + output: Mapping[base.OutputKey, Series | ArrayLike], ) -> FrameOrSeriesUnion: """ Wraps the output of a SeriesGroupBy aggregation into the expected result. Parameters ---------- - output : Mapping[base.OutputKey, Union[Series, np.ndarray]] + output : Mapping[base.OutputKey, Union[Series, ArrayLike]] Data to wrap. Returns @@ -442,7 +432,8 @@ def _wrap_aggregated_output( In the vast majority of cases output will only contain one element. The exception is operations that expand dimensions, like ohlc. """ - result = self._wrap_series_output(output=output, index=index) + assert len(output) == 1 + result = self._wrap_series_output(output=output) return self._reindex_output(result) def _wrap_transformed_output( @@ -466,7 +457,10 @@ def _wrap_transformed_output( for consistency with DataFrame methods and _wrap_aggregated_output. """ assert len(output) == 1 - result = self._wrap_series_output(output=output, index=self.obj.index) + + name = self.obj.name + values = list(output.values())[0] + result = self.obj._constructor(values, index=self.obj.index, name=name) # No transformations increase the ndim of the result assert isinstance(result, Series) @@ -1186,11 +1180,9 @@ def py_fallback(values: ArrayLike) -> ArrayLike: sgb = get_groupby(obj, self.grouper, observed=True) result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) - assert isinstance(result, (Series, DataFrame)) # for mypy # In the case of object dtype block, it may have been split # in the operation. We un-split here. result = result._consolidate() - assert isinstance(result, (Series, DataFrame)) # for mypy # unwrap DataFrame/Series to get array mgr = result._mgr arrays = mgr.arrays @@ -1733,7 +1725,6 @@ def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None: def _wrap_aggregated_output( self, output: Mapping[base.OutputKey, Series | np.ndarray], - index: Index | None, ) -> DataFrame: """ Wraps the output of DataFrameGroupBy aggregations into the expected result. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f579b04db898e..ce7f0de616e18 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1056,9 +1056,7 @@ def _set_result_index_ordered( return result - def _wrap_aggregated_output( - self, output: Mapping[base.OutputKey, np.ndarray], index: Index | None - ): + def _wrap_aggregated_output(self, output: Mapping[base.OutputKey, np.ndarray]): raise AbstractMethodError(self) def _wrap_transformed_output(self, output: Mapping[base.OutputKey, ArrayLike]): @@ -1259,7 +1257,7 @@ def _python_agg_general(self, func, *args, **kwargs): if not output: return self._python_apply_general(f, self._selected_obj) - return self._wrap_aggregated_output(output, index=self.grouper.result_index) + return self._wrap_aggregated_output(output) @final def _agg_general( @@ -2786,7 +2784,7 @@ def _get_cythonized_result( raise TypeError(error_msg) if aggregate: - return self._wrap_aggregated_output(output, index=self.grouper.result_index) + return self._wrap_aggregated_output(output) else: return self._wrap_transformed_output(output) From 6328016559ba3e8a65dd51ea70d22be4fa1d1417 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 23 Apr 2021 14:15:01 -0700 Subject: [PATCH 2/6] REF: remove _wrap_series_output --- pandas/core/groupby/generic.py | 51 ++++++---------------------------- 1 file changed, 9 insertions(+), 42 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3d179331f3a1c..9f9a8f9c53769 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -372,49 +372,10 @@ def _cython_agg_general( return self._wrap_aggregated_output(output) - def _wrap_series_output( - self, - output: Mapping[base.OutputKey, Series | ArrayLike], - ) -> FrameOrSeriesUnion: - """ - Wraps the output of a SeriesGroupBy operation into the expected result. - - Parameters - ---------- - output : Mapping[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]] - Data to wrap. - - Returns - ------- - Series or DataFrame - - Notes - ----- - In the vast majority of cases output and columns will only contain one - element. The exception is operations that expand dimensions, like ohlc. - """ - index = self.grouper.result_index - - indexed_output = {key.position: val for key, val in output.items()} - columns = Index(key.label for key in output) - - result: FrameOrSeriesUnion - if len(output) > 1: - result = self.obj._constructor_expanddim(indexed_output, index=index) - result.columns = columns - elif not columns.empty: - result = self.obj._constructor( - indexed_output[0], index=index, name=columns[0] - ) - else: - result = self.obj._constructor_expanddim() - - return result - def _wrap_aggregated_output( self, output: Mapping[base.OutputKey, Series | ArrayLike], - ) -> FrameOrSeriesUnion: + ) -> Series: """ Wraps the output of a SeriesGroupBy aggregation into the expected result. @@ -425,15 +386,21 @@ def _wrap_aggregated_output( Returns ------- - Series or DataFrame + Series Notes ----- In the vast majority of cases output will only contain one element. The exception is operations that expand dimensions, like ohlc. """ + assert len(output) == 1 - result = self._wrap_series_output(output=output) + + name = self.obj.name + index = self.grouper.result_index + values = list(output.values())[0] + + result = self.obj._constructor(values, index=index, name=name) return self._reindex_output(result) def _wrap_transformed_output( From f3b1baacd57e311cd29097faa0901fcfcb081546 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 23 Apr 2021 15:18:00 -0700 Subject: [PATCH 3/6] REF: remove unnecessary layer --- pandas/core/groupby/generic.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9f9a8f9c53769..81004a3f3f3ba 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1076,14 +1076,6 @@ def _iterate_slices(self) -> Iterable[Series]: def _cython_agg_general( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 ) -> DataFrame: - agg_mgr = self._cython_agg_manager( - how, alt=alt, numeric_only=numeric_only, min_count=min_count - ) - return self._wrap_agged_manager(agg_mgr) - - def _cython_agg_manager( - self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 - ) -> Manager2D: # Note: we never get here with how="ohlc"; that goes through SeriesGroupBy data: Manager2D = self._get_data_to_aggregate() @@ -1185,7 +1177,7 @@ def array_func(values: ArrayLike) -> ArrayLike: if not len(new_mgr): raise DataError("No numeric types to aggregate") - return new_mgr + return self._wrap_agged_manager(new_mgr) def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: if self.grouper.nkeys != 1: From 90b662a6d95e96f190d4804b85f3b37da517ff9d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 23 Apr 2021 15:20:42 -0700 Subject: [PATCH 4/6] revert incorrect comment --- pandas/core/groupby/generic.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 81004a3f3f3ba..58a99fb8958a8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -336,7 +336,6 @@ def _aggregate_multiple_funcs(self, arg): # let higher level handle return results - # Otherwise, the user-provided functions were not all reducing, see GH#35490 indexed_output = {key.position: val for key, val in results.items()} output = self.obj._constructor_expanddim(indexed_output, index=None) output.columns = Index(key.label for key in results) @@ -393,7 +392,6 @@ def _wrap_aggregated_output( In the vast majority of cases output will only contain one element. The exception is operations that expand dimensions, like ohlc. """ - assert len(output) == 1 name = self.obj.name From 941763b32e2af173002fccf4222bed7a5e64446b Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 24 Apr 2021 16:40:55 -0700 Subject: [PATCH 5/6] PERF: next(iter(...)) --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 58a99fb8958a8..db7a18fba55cf 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -396,7 +396,7 @@ def _wrap_aggregated_output( name = self.obj.name index = self.grouper.result_index - values = list(output.values())[0] + values = next(iter(output.values())) result = self.obj._constructor(values, index=index, name=name) return self._reindex_output(result) From 1ab60ba54b793401ba0a0cc7da11db389bac894c Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 24 Apr 2021 17:55:27 -0700 Subject: [PATCH 6/6] next(iter(...)) --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index db7a18fba55cf..bfcb62015f118 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -424,7 +424,7 @@ def _wrap_transformed_output( assert len(output) == 1 name = self.obj.name - values = list(output.values())[0] + values = next(iter(output.values())) result = self.obj._constructor(values, index=self.obj.index, name=name) # No transformations increase the ndim of the result