Skip to content

Commit 58fa02b

Browse files
jbrockmendelfeefladder
authored andcommitted
PERF: rebuild_blknos_and_blklocs (pandas-dev#43353)
1 parent df31e42 commit 58fa02b

File tree

3 files changed

+38
-21
lines changed

3 files changed

+38
-21
lines changed

pandas/_libs/internals.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -82,3 +82,4 @@ class BlockManager:
8282
self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=True
8383
): ...
8484
def get_slice(self: T, slobj: slice, axis: int = ...) -> T: ...
85+
def _rebuild_blknos_and_blklocs(self) -> None: ...

pandas/_libs/internals.pyx

+37
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,43 @@ cdef class BlockManager:
669669
self._blknos = None
670670
self._blklocs = None
671671

672+
# -------------------------------------------------------------------
673+
# Block Placement
674+
675+
def _rebuild_blknos_and_blklocs(self) -> None:
676+
"""
677+
Update mgr._blknos / mgr._blklocs.
678+
"""
679+
cdef:
680+
intp_t blkno, i, j
681+
cnp.npy_intp length = self.shape[0]
682+
SharedBlock blk
683+
BlockPlacement bp
684+
685+
# equiv: np.empty(length, dtype=np.intp)
686+
new_blknos = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0)
687+
new_blklocs = cnp.PyArray_EMPTY(1, &length, cnp.NPY_INTP, 0)
688+
new_blknos.fill(-1)
689+
new_blklocs.fill(-1)
690+
691+
for blkno, blk in enumerate(self.blocks):
692+
bp = blk.mgr_locs
693+
# Iterating over `bp` is a faster equivalent to
694+
# new_blknos[bp.indexer] = blkno
695+
# new_blklocs[bp.indexer] = np.arange(len(bp))
696+
for i, j in enumerate(bp):
697+
new_blknos[j] = blkno
698+
new_blklocs[j] = i
699+
700+
for blkno in new_blknos:
701+
# If there are any -1s remaining, this indicates that our mgr_locs
702+
# are invalid.
703+
if blkno == -1:
704+
raise AssertionError("Gaps in blk ref_locs")
705+
706+
self._blknos = new_blknos
707+
self._blklocs = new_blklocs
708+
672709
# -------------------------------------------------------------------
673710
# Pickle
674711

pandas/core/internals/managers.py

-21
Original file line numberDiff line numberDiff line change
@@ -217,27 +217,6 @@ def is_single_block(self) -> bool:
217217
# Assumes we are 2D; overridden by SingleBlockManager
218218
return len(self.blocks) == 1
219219

220-
def _rebuild_blknos_and_blklocs(self) -> None:
221-
"""
222-
Update mgr._blknos / mgr._blklocs.
223-
"""
224-
new_blknos = np.empty(self.shape[0], dtype=np.intp)
225-
new_blklocs = np.empty(self.shape[0], dtype=np.intp)
226-
new_blknos.fill(-1)
227-
new_blklocs.fill(-1)
228-
229-
for blkno, blk in enumerate(self.blocks):
230-
rl = blk.mgr_locs
231-
new_blknos[rl.indexer] = blkno
232-
new_blklocs[rl.indexer] = np.arange(len(rl))
233-
234-
if (new_blknos == -1).any():
235-
# TODO: can we avoid this? it isn't cheap
236-
raise AssertionError("Gaps in blk ref_locs")
237-
238-
self._blknos = new_blknos
239-
self._blklocs = new_blklocs
240-
241220
@property
242221
def items(self) -> Index:
243222
return self.axes[0]

0 commit comments

Comments
 (0)