diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b806d9856d20f..1f0cdbd07560f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1111,6 +1111,7 @@ def blk_func(block: "Block") -> List["Block"]: assert len(locs) == result.shape[1] for i, loc in enumerate(locs): agg_block = result.iloc[:, [i]]._mgr.blocks[0] + agg_block.mgr_locs = [loc] new_blocks.append(agg_block) else: result = result._mgr.blocks[0].values @@ -1124,7 +1125,6 @@ def blk_func(block: "Block") -> List["Block"]: return new_blocks skipped: List[int] = [] - new_items: List[np.ndarray] = [] for i, block in enumerate(data.blocks): try: nbs = blk_func(block) @@ -1136,33 +1136,13 @@ def blk_func(block: "Block") -> List["Block"]: deleted_items.append(block.mgr_locs.as_array) else: agg_blocks.extend(nbs) - new_items.append(block.mgr_locs.as_array) if not agg_blocks: raise DataError("No numeric types to aggregate") # reset the locs in the blocks to correspond to our # current ordering - indexer = np.concatenate(new_items) - agg_items = data.items.take(np.sort(indexer)) - - if deleted_items: - - # we need to adjust the indexer to account for the - # items we have removed - # really should be done in internals :< - - deleted = np.concatenate(deleted_items) - ai = np.arange(len(data)) - mask = np.zeros(len(data)) - mask[deleted] = 1 - indexer = (ai - mask.cumsum())[indexer] - - offset = 0 - for blk in agg_blocks: - loc = len(blk.mgr_locs) - blk.mgr_locs = indexer[offset : (offset + loc)] - offset += loc + agg_items = data.reset_dropped_locs(agg_blocks, skipped) return agg_blocks, agg_items diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5a215c4cd5fa3..f05d4cf1c4be6 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1504,6 +1504,38 @@ def unstack(self, unstacker, fill_value) -> "BlockManager": bm = BlockManager(new_blocks, [new_columns, new_index]) return bm + def reset_dropped_locs(self, blocks: List[Block], skipped: List[int]) -> Index: + """ + Decrement the mgr_locs of the given blocks with `skipped` removed. + + Notes + ----- + Alters each block's mgr_locs inplace. + """ + ncols = len(self) + + new_locs = [blk.mgr_locs.as_array for blk in blocks] + indexer = np.concatenate(new_locs) + + new_items = self.items.take(np.sort(indexer)) + + if skipped: + # we need to adjust the indexer to account for the + # items we have removed + deleted_items = [self.blocks[i].mgr_locs.as_array for i in skipped] + deleted = np.concatenate(deleted_items) + ai = np.arange(ncols) + mask = np.zeros(ncols) + mask[deleted] = 1 + indexer = (ai - mask.cumsum())[indexer] + + offset = 0 + for blk in blocks: + loc = len(blk.mgr_locs) + blk.mgr_locs = indexer[offset : (offset + loc)] + offset += loc + return new_items + class SingleBlockManager(BlockManager): """ manage a single block with """