From 120934596a77213db1d164506dbabacdf9b58827 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 12 Aug 2020 18:30:36 -0700 Subject: [PATCH 1/3] REF: implement reset_dropped_locs --- pandas/core/groupby/generic.py | 24 ++--------------------- pandas/core/internals/managers.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b7280a9f7db3c..d29f516f7404e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1107,6 +1107,7 @@ def blk_func(block: "Block") -> List["Block"]: assert len(locs) == result.shape[1] for i, loc in enumerate(locs): agg_block = result.iloc[:, [i]]._mgr.blocks[0] + agg_block.mgr_locs = [loc] new_blocks.append(agg_block) else: result = result._mgr.blocks[0].values @@ -1120,7 +1121,6 @@ def blk_func(block: "Block") -> List["Block"]: return new_blocks skipped: List[int] = [] - new_items: List[np.ndarray] = [] for i, block in enumerate(data.blocks): try: nbs = blk_func(block) @@ -1132,33 +1132,13 @@ def blk_func(block: "Block") -> List["Block"]: deleted_items.append(block.mgr_locs.as_array) else: agg_blocks.extend(nbs) - new_items.append(block.mgr_locs.as_array) if not agg_blocks: raise DataError("No numeric types to aggregate") # reset the locs in the blocks to correspond to our # current ordering - indexer = np.concatenate(new_items) - agg_items = data.items.take(np.sort(indexer)) - - if deleted_items: - - # we need to adjust the indexer to account for the - # items we have removed - # really should be done in internals :< - - deleted = np.concatenate(deleted_items) - ai = np.arange(len(data)) - mask = np.zeros(len(data)) - mask[deleted] = 1 - indexer = (ai - mask.cumsum())[indexer] - - offset = 0 - for blk in agg_blocks: - loc = len(blk.mgr_locs) - blk.mgr_locs = indexer[offset : (offset + loc)] - offset += loc + agg_items = data.reset_dropped_locs(agg_blocks, skipped) return agg_blocks, agg_items diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 371b721f08b27..38606992c0b05 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1494,6 +1494,38 @@ def unstack(self, unstacker, fill_value) -> "BlockManager": bm = BlockManager(new_blocks, [new_columns, new_index]) return bm + def reset_dropped_locs(self, blocks: List[Block], skipped: List[int]) -> Index: + """ + Decrement the mgr_locs of the given blocks with `skipped` removed. + + Notes + ----- + Alters each block's mgr_locs inplace. + """ + ncols = len(self) + + new_locs = [blk.mgr_locs.as_array for blk in blocks] + indexer = np.concatenate(new_locs) + + new_items = self.items.take(np.sort(indexer)) + + if skipped: + # we need to adjust the indexer to account for the + # items we have removed + deleted_items = [self.blocks[i].mgr_locs.as_array for i in skipped] + deleted = np.concatenate(deleted_items) + ai = np.arange(ncols) + mask = np.zeros(ncols) + mask[deleted] = 1 + indexer = (ai - mask.cumsum())[indexer] + + offset = 0 + for blk in blocks: + loc = len(blk.mgr_locs) + blk.mgr_locs = indexer[offset : (offset + loc)] + offset += loc + return new_items + class SingleBlockManager(BlockManager): """ manage a single block with """ From 43e929785769779a3bc5b94c14d93e170d83bb37 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 14 Aug 2020 17:08:23 -0700 Subject: [PATCH 2/3] dummy to force Travis From f28c43d5c48540ca6e993bc39a9727146c56c52d Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 15 Aug 2020 12:21:50 -0700 Subject: [PATCH 3/3] dummy to force CI