diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f194c774cf329..dc2e80c4ae371 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1022,9 +1022,32 @@ def _cython_agg_blocks( agg_blocks: List[Block] = [] new_items: List[np.ndarray] = [] deleted_items: List[np.ndarray] = [] - # Some object-dtype blocks might be split into List[Block[T], Block[U]] - split_items: List[np.ndarray] = [] - split_frames: List[DataFrame] = [] + + def _recast_result(result, values): + # see if we can cast the block back to the original dtype + assert not isinstance(result, DataFrame) + assert result is not no_result + + result = maybe_downcast_numeric(result, values.dtype) + + if not isinstance(values, np.ndarray) and isinstance(result, np.ndarray): + # e.g. block.values was an IntegerArray + # (1, N) case can occur if block.values was Categorical + # and result is ndarray[object] + assert result.ndim == 1 or result.shape[0] == 1 + try: + # Cast back if feasible + result = type(values)._from_sequence( + result.ravel(), dtype=values.dtype + ) + except ValueError: + # reshape to be valid for non-Extension Block + result = result.reshape(1, -1) + + elif isinstance(result, np.ndarray) and result.ndim == 1: + result = result.reshape(1, -1) + + return result no_result = object() for block in data.blocks: @@ -1048,6 +1071,7 @@ def _cython_agg_blocks( continue # call our grouper again with only this block + # TODO: will this mess up if we have duplicate columns? obj = self.obj[data.items[locs]] if obj.shape[1] == 1: # Avoid call to self.values that can occur in DataFrame @@ -1063,58 +1087,32 @@ def _cython_agg_blocks( deleted_items.append(locs) continue else: + if isinstance(result, Series): + result = result.to_frame() + result = cast(DataFrame, result) # unwrap DataFrame to get array - if len(result._data.blocks) != 1: - # We've split an object block! Everything we've assumed - # about a single block input returning a single block output - # is a lie. To keep the code-path for the typical non-split case - # clean, we choose to clean up this mess later on. - split_items.append(locs) - split_frames.append(result) - continue - - assert len(result._data.blocks) == 1 - result = result._data.blocks[0].values - if isinstance(result, np.ndarray) and result.ndim == 1: - result = result.reshape(1, -1) - - assert not isinstance(result, DataFrame) - if result is not no_result: - # see if we can cast the block back to the original dtype - result = maybe_downcast_numeric(result, block.dtype) - - if block.is_extension and isinstance(result, np.ndarray): - # e.g. block.values was an IntegerArray - # (1, N) case can occur if block.values was Categorical - # and result is ndarray[object] - assert result.ndim == 1 or result.shape[0] == 1 - try: - # Cast back if feasible - result = type(block.values)._from_sequence( - result.ravel(), dtype=block.values.dtype - ) - except ValueError: - # reshape to be valid for non-Extension Block - result = result.reshape(1, -1) + for i, col in enumerate(result.columns): + nb = result.iloc[:, [i]]._data.blocks[0] + loc = data.items.get_loc(col) + # FIXME: requires unique? GH#31735 + res = _recast_result(nb.values, data.iget(loc).blocks[0].values) + nb2 = make_block(res, placement=[loc], ndim=2) + agg_blocks.append(nb2) + else: + assert not isinstance(result, DataFrame) + assert result is not no_result + result = _recast_result(result, block.values) agg_block: Block = block.make_block(result) + agg_blocks.append(agg_block) new_items.append(locs) - agg_blocks.append(agg_block) - if not (agg_blocks or split_frames): + if not agg_blocks: raise DataError("No numeric types to aggregate") - if split_items: - # Clean up the mess left over from split blocks. - for locs, result in zip(split_items, split_frames): - assert len(locs) == result.shape[1] - for i, loc in enumerate(locs): - new_items.append(np.array([loc], dtype=locs.dtype)) - agg_blocks.append(result.iloc[:, [i]]._data.blocks[0]) - # reset the locs in the blocks to correspond to our # current ordering indexer = np.concatenate(new_items)