From 39de3a44a097714ba05ec4b685d7c94a604e703e Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 7 Sep 2021 09:12:46 -0700 Subject: [PATCH 1/2] REF: share _wrap_aggregated_output --- pandas/core/groupby/generic.py | 75 +++++++--------------------------- pandas/core/groupby/groupby.py | 47 ++++++++++++++++++++- 2 files changed, 61 insertions(+), 61 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 35cb247e96bc3..8bb8f00b4c406 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -354,35 +354,17 @@ def array_func(values: ArrayLike) -> ArrayLike: ) return self._reindex_output(ser) - def _wrap_aggregated_output( - self, - output: Mapping[base.OutputKey, Series | ArrayLike], + def _indexed_output_to_ndframe( + self, output: Mapping[base.OutputKey, ArrayLike] ) -> Series: """ - Wraps the output of a SeriesGroupBy aggregation into the expected result. - - Parameters - ---------- - output : Mapping[base.OutputKey, Union[Series, ArrayLike]] - Data to wrap. - - Returns - ------- - Series - - Notes - ----- - In the vast majority of cases output will only contain one element. - The exception is operations that expand dimensions, like ohlc. + Wrap the dict result of a GroupBy aggregation into a Series. """ assert len(output) == 1 - - name = self.obj.name - index = self.grouper.result_index values = next(iter(output.values())) - - result = self.obj._constructor(values, index=index, name=name) - return self._reindex_output(result) + result = self.obj._constructor(values) + result.name = self.obj.name + return result def _wrap_transformed_output( self, output: Mapping[base.OutputKey, Series | ArrayLike] @@ -1614,46 +1596,19 @@ def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None: if in_axis and name not in columns: result.insert(0, name, lev) - def _wrap_aggregated_output( - self, - output: Mapping[base.OutputKey, Series | ArrayLike], + def _indexed_output_to_ndframe( + self, output: Mapping[base.OutputKey, ArrayLike] ) -> DataFrame: """ - Wraps the output of DataFrameGroupBy aggregations into the expected result. - - Parameters - ---------- - output : Mapping[base.OutputKey, Union[Series, np.ndarray]] - Data to wrap. - - Returns - ------- - DataFrame + Wrap the dict result of a GroupBy aggregation into a DataFrame. """ - if isinstance(output, DataFrame): - result = output - else: - indexed_output = {key.position: val for key, val in output.items()} - columns = Index([key.label for key in output]) - columns._set_names(self._obj_with_exclusions._get_axis(1 - self.axis).names) - - result = self.obj._constructor(indexed_output) - result.columns = columns - - if not self.as_index: - self._insert_inaxis_grouper_inplace(result) - result = result._consolidate() - else: - result.index = self.grouper.result_index - - if self.axis == 1: - result = result.T - if result.index.equals(self.obj.index): - # Retain e.g. DatetimeIndex/TimedeltaIndex freq - result.index = self.obj.index.copy() - # TODO: Do this more systematically + indexed_output = {key.position: val for key, val in output.items()} + columns = Index([key.label for key in output]) + columns._set_names(self._obj_with_exclusions._get_axis(1 - self.axis).names) - return self._reindex_output(result) + result = self.obj._constructor(indexed_output) + result.columns = columns + return result def _wrap_transformed_output( self, output: Mapping[base.OutputKey, Series | ArrayLike] diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 0e358e611f418..a60ec29581337 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1095,9 +1095,54 @@ def _set_result_index_ordered( return result - def _wrap_aggregated_output(self, output: Mapping[base.OutputKey, ArrayLike]): + def _indexed_output_to_ndframe( + self, result: Mapping[base.OutputKey, ArrayLike] + ) -> Series | DataFrame: raise AbstractMethodError(self) + def _wrap_aggregated_output( + self, output: Series | DataFrame | Mapping[base.OutputKey, ArrayLike] + ): + """ + Wraps the output of GroupBy aggregations into the expected result. + + Parameters + ---------- + output : Series, DataFrame, or Mapping[base.OutputKey, ArrayLike] + Data to wrap. + + Returns + ------- + Series or DataFrame + """ + + if isinstance(output, (Series, DataFrame)): + # We get here (for DataFrameGroupBy) if we used Manager.grouped_reduce, + # in which case our columns are already set correctly. + # ATM we do not get here for SeriesGroupBy; when we do, we will + # need to require that result.name already match self.obj.name + result = output + else: + result = self._indexed_output_to_ndframe(output) + + if not self.as_index: + # `not self.as_index` is only relevant for DataFrameGroupBy, + # enforced in __init__ + self._insert_inaxis_grouper_inplace(result) + result = result._consolidate() + else: + result.index = self.grouper.result_index + + if self.axis == 1: + # Only relevant for DataFrameGroupBy, no-op for SeriesGroupBy + result = result.T + if result.index.equals(self.obj.index): + # Retain e.g. DatetimeIndex/TimedeltaIndex freq + result.index = self.obj.index.copy() + # TODO: Do this more systematically + + return self._reindex_output(result) + def _wrap_transformed_output(self, output: Mapping[base.OutputKey, ArrayLike]): raise AbstractMethodError(self) From 23cdf2f24f2a7d8b55803dcaf3a1ee15405a7cb6 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 7 Sep 2021 11:19:21 -0700 Subject: [PATCH 2/2] REF: share _wrap_transformed_output --- pandas/core/groupby/generic.py | 60 ---------------------------------- pandas/core/groupby/groupby.py | 29 ++++++++++++++-- 2 files changed, 27 insertions(+), 62 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 8bb8f00b4c406..9f45a6665ca5c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -366,36 +366,6 @@ def _indexed_output_to_ndframe( result.name = self.obj.name return result - def _wrap_transformed_output( - self, output: Mapping[base.OutputKey, Series | ArrayLike] - ) -> Series: - """ - Wraps the output of a SeriesGroupBy aggregation into the expected result. - - Parameters - ---------- - output : dict[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]] - Dict with a sole key of 0 and a value of the result values. - - Returns - ------- - Series - - Notes - ----- - output should always contain one element. It is specified as a dict - for consistency with DataFrame methods and _wrap_aggregated_output. - """ - assert len(output) == 1 - - name = self.obj.name - values = next(iter(output.values())) - result = self.obj._constructor(values, index=self.obj.index, name=name) - - # No transformations increase the ndim of the result - assert isinstance(result, Series) - return result - def _wrap_applied_output( self, data: Series, @@ -1610,36 +1580,6 @@ def _indexed_output_to_ndframe( result.columns = columns return result - def _wrap_transformed_output( - self, output: Mapping[base.OutputKey, Series | ArrayLike] - ) -> DataFrame: - """ - Wraps the output of DataFrameGroupBy transformations into the expected result. - - Parameters - ---------- - output : Mapping[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]] - Data to wrap. - - Returns - ------- - DataFrame - """ - indexed_output = {key.position: val for key, val in output.items()} - result = self.obj._constructor(indexed_output) - - if self.axis == 1: - result = result.T - result.columns = self.obj.columns - else: - columns = Index(key.label for key in output) - columns._set_names(self.obj._get_axis(1 - self.axis).names) - result.columns = columns - - result.index = self.obj.index - - return result - def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: if not self.as_index: # GH 41998 - empty mgr always gets index of length 0 diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a60ec29581337..0547b9209ae65 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1100,6 +1100,7 @@ def _indexed_output_to_ndframe( ) -> Series | DataFrame: raise AbstractMethodError(self) + @final def _wrap_aggregated_output( self, output: Series | DataFrame | Mapping[base.OutputKey, ArrayLike] ): @@ -1143,8 +1144,32 @@ def _wrap_aggregated_output( return self._reindex_output(result) - def _wrap_transformed_output(self, output: Mapping[base.OutputKey, ArrayLike]): - raise AbstractMethodError(self) + @final + def _wrap_transformed_output( + self, output: Mapping[base.OutputKey, ArrayLike] + ) -> Series | DataFrame: + """ + Wraps the output of GroupBy transformations into the expected result. + + Parameters + ---------- + output : Mapping[base.OutputKey, ArrayLike] + Data to wrap. + + Returns + ------- + Series or DataFrame + Series for SeriesGroupBy, DataFrame for DataFrameGroupBy + """ + result = self._indexed_output_to_ndframe(output) + + if self.axis == 1: + # Only relevant for DataFrameGroupBy + result = result.T + result.columns = self.obj.columns + + result.index = self.obj.index + return result def _wrap_applied_output(self, data, keys, values, not_indexed_same: bool = False): raise AbstractMethodError(self)