Skip to content

Commit 9f48b88

Browse files
authored
PERF: unstack (#44758)
1 parent e6091f5 commit 9f48b88

File tree

2 files changed

+20
-6
lines changed

2 files changed

+20
-6
lines changed

pandas/core/internals/blocks.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -1252,7 +1252,7 @@ def _unstack(
12521252
unstacker,
12531253
fill_value,
12541254
new_placement: npt.NDArray[np.intp],
1255-
allow_fill: bool,
1255+
needs_masking: npt.NDArray[np.bool_],
12561256
):
12571257
"""
12581258
Return a list of unstacked blocks of self
@@ -1264,6 +1264,7 @@ def _unstack(
12641264
Only used in ExtensionBlock._unstack
12651265
new_placement : np.ndarray[np.intp]
12661266
allow_fill : bool
1267+
needs_masking : np.ndarray[bool]
12671268
12681269
Returns
12691270
-------
@@ -1673,7 +1674,7 @@ def _unstack(
16731674
unstacker,
16741675
fill_value,
16751676
new_placement: npt.NDArray[np.intp],
1676-
allow_fill: bool,
1677+
needs_masking: npt.NDArray[np.bool_],
16771678
):
16781679
# ExtensionArray-safe unstack.
16791680
# We override ObjectBlock._unstack, which unstacks directly on the
@@ -1692,14 +1693,20 @@ def _unstack(
16921693
new_values = new_values.T[mask]
16931694
new_placement = new_placement[mask]
16941695

1696+
# needs_masking[i] calculated once in BlockManager.unstack tells
1697+
# us if there are any -1s in the relevant indices. When False,
1698+
# that allows us to go through a faster path in 'take', among
1699+
# other things avoiding e.g. Categorical._validate_scalar.
16951700
blocks = [
16961701
# TODO: could cast to object depending on fill_value?
16971702
type(self)(
1698-
self.values.take(indices, allow_fill=allow_fill, fill_value=fill_value),
1703+
self.values.take(
1704+
indices, allow_fill=needs_masking[i], fill_value=fill_value
1705+
),
16991706
BlockPlacement(place),
17001707
ndim=2,
17011708
)
1702-
for indices, place in zip(new_values, new_placement)
1709+
for i, (indices, place) in enumerate(zip(new_values, new_placement))
17031710
]
17041711
return blocks, mask
17051712

pandas/core/internals/managers.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -1425,7 +1425,14 @@ def unstack(self, unstacker, fill_value) -> BlockManager:
14251425
new_columns = unstacker.get_new_columns(self.items)
14261426
new_index = unstacker.new_index
14271427

1428-
allow_fill = not unstacker.mask.all()
1428+
allow_fill = not unstacker.mask_all
1429+
if allow_fill:
1430+
# calculating the full mask once and passing it to Block._unstack is
1431+
# faster than letting calculating it in each repeated call
1432+
new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape)
1433+
needs_masking = new_mask2D.any(axis=0)
1434+
else:
1435+
needs_masking = np.zeros(unstacker.full_shape[1], dtype=bool)
14291436

14301437
new_blocks: list[Block] = []
14311438
columns_mask: list[np.ndarray] = []
@@ -1445,7 +1452,7 @@ def unstack(self, unstacker, fill_value) -> BlockManager:
14451452
unstacker,
14461453
fill_value,
14471454
new_placement=new_placement,
1448-
allow_fill=allow_fill,
1455+
needs_masking=needs_masking,
14491456
)
14501457

14511458
new_blocks.extend(blocks)

0 commit comments

Comments
 (0)