Skip to content

Commit 074ef63

Browse files
authored
PERF: unstack 2 the unstackening (pandas-dev#43352)
1 parent ecaa78d commit 074ef63

File tree

2 files changed

+22
-4
lines changed

2 files changed

+22
-4
lines changed

pandas/core/internals/blocks.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -1285,6 +1285,10 @@ def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
12851285
mask = mask.any(0)
12861286
# TODO: in all tests we have mask.all(); can we rely on that?
12871287

1288+
# Note: these next two lines ensure that
1289+
# mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)
1290+
# which the calling function needs in order to pass verify_integrity=False
1291+
# to the BlockManager constructor
12881292
new_values = new_values.T[mask]
12891293
new_placement = new_placement[mask]
12901294

@@ -1656,13 +1660,21 @@ def _unstack(self, unstacker, fill_value, new_placement, allow_fill: bool):
16561660
mask = mask.any(0)
16571661
# TODO: in all tests we have mask.all(); can we rely on that?
16581662

1663+
# Note: these next two lines ensure that
1664+
# mask.sum() == sum(len(nb.mgr_locs) for nb in blocks)
1665+
# which the calling function needs in order to pass verify_integrity=False
1666+
# to the BlockManager constructor
1667+
new_values = new_values.T[mask]
1668+
new_placement = new_placement[mask]
1669+
16591670
blocks = [
16601671
# TODO: could cast to object depending on fill_value?
1661-
self.make_block_same_class(
1672+
type(self)(
16621673
self.values.take(indices, allow_fill=allow_fill, fill_value=fill_value),
16631674
BlockPlacement(place),
1675+
ndim=2,
16641676
)
1665-
for indices, place in zip(new_values.T, new_placement)
1677+
for indices, place in zip(new_values, new_placement)
16661678
]
16671679
return blocks, mask
16681680

pandas/core/internals/managers.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1384,9 +1384,15 @@ def unstack(self, unstacker, fill_value) -> BlockManager:
13841384
new_blocks.extend(blocks)
13851385
columns_mask.extend(mask)
13861386

1387+
# Block._unstack should ensure this holds,
1388+
assert mask.sum() == sum(len(nb._mgr_locs) for nb in blocks)
1389+
# In turn this ensures that in the BlockManager call below
1390+
# we have len(new_columns) == sum(x.shape[0] for x in new_blocks)
1391+
# which suffices to allow us to pass verify_inegrity=False
1392+
13871393
new_columns = new_columns[columns_mask]
13881394

1389-
bm = BlockManager(new_blocks, [new_columns, new_index])
1395+
bm = BlockManager(new_blocks, [new_columns, new_index], verify_integrity=False)
13901396
return bm
13911397

13921398
def to_dict(self, copy: bool = True):
@@ -1689,7 +1695,7 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager:
16891695
blk = self._block
16901696
array = blk._slice(slobj)
16911697
bp = BlockPlacement(slice(0, len(array)))
1692-
block = blk.make_block_same_class(array, placement=bp)
1698+
block = type(blk)(array, placement=bp, ndim=1)
16931699
new_index = self.index._getitem_slice(slobj)
16941700
return type(self)(block, new_index)
16951701

0 commit comments

Comments
 (0)