diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index e6f2e300c5567..cdb5dddf03a64 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -163,15 +163,7 @@ def prop(self): class SeriesGroupBy(GroupBy[Series]): def _wrap_agged_manager(self, mgr: Manager) -> Series: - if mgr.ndim == 1: - mgr = cast(SingleManager, mgr) - single = mgr - else: - mgr = cast(Manager2D, mgr) - single = mgr.iget(0) - ser = self.obj._constructor(single, name=self.obj.name) - # NB: caller is responsible for setting ser.index - return ser + return self.obj._constructor(mgr, name=self.obj.name) def _get_data_to_aggregate( self, *, numeric_only: bool = False, name: str | None = None @@ -1902,25 +1894,7 @@ def _indexed_output_to_ndframe( return result def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: - if not self.as_index: - # GH 41998 - empty mgr always gets index of length 0 - rows = mgr.shape[1] if mgr.shape[0] > 0 else 0 - index = Index(range(rows)) - mgr.set_axis(1, index) - result = self.obj._constructor(mgr) - - result = self._insert_inaxis_grouper(result) - result = result._consolidate() - else: - index = self.grouper.result_index - mgr.set_axis(1, index) - result = self.obj._constructor(mgr) - - if self.axis == 1: - result = result.T - - # Note: we really only care about inferring numeric dtypes here - return self._reindex_output(result).infer_objects(copy=False) + return self.obj._constructor(mgr) def _iterate_column_groupbys(self, obj: DataFrame | Series): for i, colname in enumerate(obj.columns): diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 26e1105ea879d..5f5bb1c8833da 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1501,7 +1501,6 @@ def _cython_agg_general( # that goes through SeriesGroupBy data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how) - is_ser = data.ndim == 1 def array_func(values: ArrayLike) -> ArrayLike: try: @@ -1523,16 +1522,12 @@ def array_func(values: ArrayLike) -> ArrayLike: return result new_mgr = data.grouped_reduce(array_func) - res = self._wrap_agged_manager(new_mgr) - if is_ser: - if self.as_index: - res.index = self.grouper.result_index - else: - res = self._insert_inaxis_grouper(res) - return self._reindex_output(res) - else: - return res + out = self._wrap_aggregated_output(res) + if data.ndim == 2: + # TODO: don't special-case DataFrame vs Series + out = out.infer_objects(copy=False) + return out def _cython_transform( self, how: str, numeric_only: bool = False, axis: AxisInt = 0, **kwargs @@ -1793,19 +1788,14 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike: return counted new_mgr = data.grouped_reduce(hfunc) + new_obj = self._wrap_agged_manager(new_mgr) # If we are grouping on categoricals we want unobserved categories to # return zero, rather than the default of NaN which the reindexing in - # _wrap_agged_manager() returns. GH 35028 + # _wrap_aggregated_output() returns. GH 35028 # e.g. test_dataframe_groupby_on_2_categoricals_when_observed_is_false with com.temp_setattr(self, "observed", True): - result = self._wrap_agged_manager(new_mgr) - - if result.ndim == 1: - if self.as_index: - result.index = self.grouper.result_index - else: - result = self._insert_inaxis_grouper(result) + result = self._wrap_aggregated_output(new_obj) return self._reindex_output(result, fill_value=0) @@ -2790,9 +2780,7 @@ def blk_func(values: ArrayLike) -> ArrayLike: mgr = obj._mgr res_mgr = mgr.apply(blk_func) - new_obj = obj._constructor(res_mgr) - if isinstance(new_obj, Series): - new_obj.name = obj.name + new_obj = self._wrap_agged_manager(res_mgr) if self.axis == 1: # Only relevant for DataFrameGroupBy @@ -3197,15 +3185,10 @@ def blk_func(values: ArrayLike) -> ArrayLike: out = out.reshape(ncols, ngroups * nqs) return post_processor(out, inference, result_mask, orig_vals) - obj = self._obj_with_exclusions - is_ser = obj.ndim == 1 data = self._get_data_to_aggregate(numeric_only=numeric_only, name="quantile") res_mgr = data.grouped_reduce(blk_func) - if is_ser: - res = self._wrap_agged_manager(res_mgr) - else: - res = obj._constructor(res_mgr) + res = self._wrap_agged_manager(res_mgr) if orig_scalar: # Avoid expensive MultiIndex construction @@ -3652,19 +3635,12 @@ def blk_func(values: ArrayLike) -> ArrayLike: return result.T - obj = self._obj_with_exclusions - # Operate block-wise instead of column-by-column - is_ser = obj.ndim == 1 mgr = self._get_data_to_aggregate(numeric_only=numeric_only, name=how) res_mgr = mgr.grouped_reduce(blk_func) - if is_ser: - out = self._wrap_agged_manager(res_mgr) - else: - out = obj._constructor(res_mgr) - + out = self._wrap_agged_manager(res_mgr) return self._wrap_aggregated_output(out) @final diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 82316806d3d47..76da973e110bf 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -947,9 +947,10 @@ def grouped_reduce(self: T, func: Callable) -> T: result_indices.append(i) if len(result_arrays) == 0: - index = Index([None]) # placeholder + nrows = 0 else: - index = Index(range(result_arrays[0].shape[0])) + nrows = result_arrays[0].shape[0] + index = Index(range(nrows)) columns = self.items diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index ff80cccaa20d3..8a4fa4c10bf5f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1538,9 +1538,10 @@ def grouped_reduce(self: T, func: Callable) -> T: result_blocks = extend_blocks(applied, result_blocks) if len(result_blocks) == 0: - index = Index([None]) # placeholder + nrows = 0 else: - index = Index(range(result_blocks[0].values.shape[-1])) + nrows = result_blocks[0].values.shape[-1] + index = Index(range(nrows)) return type(self).from_blocks(result_blocks, [self.axes[0], index])