Skip to content

Commit 8f0d038

Browse files
authored
REF: implement reset_dropped_locs (#35696)
1 parent 61e3fd0 commit 8f0d038

File tree

2 files changed

+34
-22
lines changed

2 files changed

+34
-22
lines changed

pandas/core/groupby/generic.py

+2-22
Original file line numberDiff line numberDiff line change
@@ -1111,6 +1111,7 @@ def blk_func(block: "Block") -> List["Block"]:
11111111
assert len(locs) == result.shape[1]
11121112
for i, loc in enumerate(locs):
11131113
agg_block = result.iloc[:, [i]]._mgr.blocks[0]
1114+
agg_block.mgr_locs = [loc]
11141115
new_blocks.append(agg_block)
11151116
else:
11161117
result = result._mgr.blocks[0].values
@@ -1124,7 +1125,6 @@ def blk_func(block: "Block") -> List["Block"]:
11241125
return new_blocks
11251126

11261127
skipped: List[int] = []
1127-
new_items: List[np.ndarray] = []
11281128
for i, block in enumerate(data.blocks):
11291129
try:
11301130
nbs = blk_func(block)
@@ -1136,33 +1136,13 @@ def blk_func(block: "Block") -> List["Block"]:
11361136
deleted_items.append(block.mgr_locs.as_array)
11371137
else:
11381138
agg_blocks.extend(nbs)
1139-
new_items.append(block.mgr_locs.as_array)
11401139

11411140
if not agg_blocks:
11421141
raise DataError("No numeric types to aggregate")
11431142

11441143
# reset the locs in the blocks to correspond to our
11451144
# current ordering
1146-
indexer = np.concatenate(new_items)
1147-
agg_items = data.items.take(np.sort(indexer))
1148-
1149-
if deleted_items:
1150-
1151-
# we need to adjust the indexer to account for the
1152-
# items we have removed
1153-
# really should be done in internals :<
1154-
1155-
deleted = np.concatenate(deleted_items)
1156-
ai = np.arange(len(data))
1157-
mask = np.zeros(len(data))
1158-
mask[deleted] = 1
1159-
indexer = (ai - mask.cumsum())[indexer]
1160-
1161-
offset = 0
1162-
for blk in agg_blocks:
1163-
loc = len(blk.mgr_locs)
1164-
blk.mgr_locs = indexer[offset : (offset + loc)]
1165-
offset += loc
1145+
agg_items = data.reset_dropped_locs(agg_blocks, skipped)
11661146

11671147
return agg_blocks, agg_items
11681148

pandas/core/internals/managers.py

+32
Original file line numberDiff line numberDiff line change
@@ -1504,6 +1504,38 @@ def unstack(self, unstacker, fill_value) -> "BlockManager":
15041504
bm = BlockManager(new_blocks, [new_columns, new_index])
15051505
return bm
15061506

1507+
def reset_dropped_locs(self, blocks: List[Block], skipped: List[int]) -> Index:
1508+
"""
1509+
Decrement the mgr_locs of the given blocks with `skipped` removed.
1510+
1511+
Notes
1512+
-----
1513+
Alters each block's mgr_locs inplace.
1514+
"""
1515+
ncols = len(self)
1516+
1517+
new_locs = [blk.mgr_locs.as_array for blk in blocks]
1518+
indexer = np.concatenate(new_locs)
1519+
1520+
new_items = self.items.take(np.sort(indexer))
1521+
1522+
if skipped:
1523+
# we need to adjust the indexer to account for the
1524+
# items we have removed
1525+
deleted_items = [self.blocks[i].mgr_locs.as_array for i in skipped]
1526+
deleted = np.concatenate(deleted_items)
1527+
ai = np.arange(ncols)
1528+
mask = np.zeros(ncols)
1529+
mask[deleted] = 1
1530+
indexer = (ai - mask.cumsum())[indexer]
1531+
1532+
offset = 0
1533+
for blk in blocks:
1534+
loc = len(blk.mgr_locs)
1535+
blk.mgr_locs = indexer[offset : (offset + loc)]
1536+
offset += loc
1537+
return new_items
1538+
15071539

15081540
class SingleBlockManager(BlockManager):
15091541
""" manage a single block with """

0 commit comments

Comments
 (0)