Skip to content

Commit 786716a

Browse files
authored
REF: remove unnecessary Block specialization (#54568)
* REF: remove unnecessary Block specialization * revert accidental * mypy fixup * mypy fixup
1 parent ff86177 commit 786716a

File tree

6 files changed

+24
-94
lines changed

6 files changed

+24
-94
lines changed

pandas/_libs/internals.pyi

+3-15
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ from pandas._typing import (
1515
)
1616

1717
from pandas import Index
18-
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
1918
from pandas.core.internals.blocks import Block as B
2019

2120
def slice_len(slc: slice, objlen: int = ...) -> int: ...
@@ -60,7 +59,7 @@ class BlockPlacement:
6059
def append(self, others: list[BlockPlacement]) -> BlockPlacement: ...
6160
def tile_for_unstack(self, factor: int) -> npt.NDArray[np.intp]: ...
6261

63-
class SharedBlock:
62+
class Block:
6463
_mgr_locs: BlockPlacement
6564
ndim: int
6665
values: ArrayLike
@@ -72,19 +71,8 @@ class SharedBlock:
7271
ndim: int,
7372
refs: BlockValuesRefs | None = ...,
7473
) -> None: ...
75-
76-
class NumpyBlock(SharedBlock):
77-
values: np.ndarray
78-
@final
79-
def slice_block_rows(self, slicer: slice) -> Self: ...
80-
81-
class NDArrayBackedBlock(SharedBlock):
82-
values: NDArrayBackedExtensionArray
83-
@final
8474
def slice_block_rows(self, slicer: slice) -> Self: ...
8575

86-
class Block(SharedBlock): ...
87-
8876
class BlockManager:
8977
blocks: tuple[B, ...]
9078
axes: list[Index]
@@ -100,7 +88,7 @@ class BlockManager:
10088

10189
class BlockValuesRefs:
10290
referenced_blocks: list[weakref.ref]
103-
def __init__(self, blk: SharedBlock | None = ...) -> None: ...
104-
def add_reference(self, blk: SharedBlock) -> None: ...
91+
def __init__(self, blk: Block | None = ...) -> None: ...
92+
def add_reference(self, blk: Block) -> None: ...
10593
def add_index_reference(self, index: Index) -> None: ...
10694
def has_reference(self) -> bool: ...

pandas/_libs/internals.pyx

+14-68
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ cnp.import_array()
2424

2525
from pandas._libs.algos import ensure_int64
2626

27-
from pandas._libs.arrays cimport NDArrayBacked
2827
from pandas._libs.util cimport (
2928
is_array,
3029
is_integer_object,
@@ -639,14 +638,19 @@ def _unpickle_block(values, placement, ndim):
639638

640639

641640
@cython.freelist(64)
642-
cdef class SharedBlock:
641+
cdef class Block:
643642
"""
644643
Defining __init__ in a cython class significantly improves performance.
645644
"""
646645
cdef:
647646
public BlockPlacement _mgr_locs
648647
public BlockValuesRefs refs
649648
readonly int ndim
649+
# 2023-08-15 no apparent performance improvement from declaring values
650+
# as ndarray in a type-special subclass (similar for NDArrayBacked).
651+
# This might change if slice_block_rows can be optimized with something
652+
# like https://github.com/numpy/numpy/issues/23934
653+
public object values
650654

651655
def __cinit__(
652656
self,
@@ -666,6 +670,8 @@ cdef class SharedBlock:
666670
refs: BlockValuesRefs, optional
667671
Ref tracking object or None if block does not have any refs.
668672
"""
673+
self.values = values
674+
669675
self._mgr_locs = placement
670676
self.ndim = ndim
671677
if refs is None:
@@ -699,51 +705,7 @@ cdef class SharedBlock:
699705
ndim = maybe_infer_ndim(self.values, self.mgr_locs)
700706
self.ndim = ndim
701707

702-
703-
cdef class NumpyBlock(SharedBlock):
704-
cdef:
705-
public ndarray values
706-
707-
def __cinit__(
708-
self,
709-
ndarray values,
710-
BlockPlacement placement,
711-
int ndim,
712-
refs: BlockValuesRefs | None = None,
713-
):
714-
# set values here; the (implicit) call to SharedBlock.__cinit__ will
715-
# set placement, ndim and refs
716-
self.values = values
717-
718-
cpdef NumpyBlock slice_block_rows(self, slice slicer):
719-
"""
720-
Perform __getitem__-like specialized to slicing along index.
721-
722-
Assumes self.ndim == 2
723-
"""
724-
new_values = self.values[..., slicer]
725-
return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)
726-
727-
728-
cdef class NDArrayBackedBlock(SharedBlock):
729-
"""
730-
Block backed by NDArrayBackedExtensionArray
731-
"""
732-
cdef public:
733-
NDArrayBacked values
734-
735-
def __cinit__(
736-
self,
737-
NDArrayBacked values,
738-
BlockPlacement placement,
739-
int ndim,
740-
refs: BlockValuesRefs | None = None,
741-
):
742-
# set values here; the (implicit) call to SharedBlock.__cinit__ will
743-
# set placement, ndim and refs
744-
self.values = values
745-
746-
cpdef NDArrayBackedBlock slice_block_rows(self, slice slicer):
708+
cpdef Block slice_block_rows(self, slice slicer):
747709
"""
748710
Perform __getitem__-like specialized to slicing along index.
749711
@@ -753,22 +715,6 @@ cdef class NDArrayBackedBlock(SharedBlock):
753715
return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)
754716

755717

756-
cdef class Block(SharedBlock):
757-
cdef:
758-
public object values
759-
760-
def __cinit__(
761-
self,
762-
object values,
763-
BlockPlacement placement,
764-
int ndim,
765-
refs: BlockValuesRefs | None = None,
766-
):
767-
# set values here; the (implicit) call to SharedBlock.__cinit__ will
768-
# set placement, ndim and refs
769-
self.values = values
770-
771-
772718
@cython.freelist(64)
773719
cdef class BlockManager:
774720
cdef:
@@ -811,7 +757,7 @@ cdef class BlockManager:
811757
cdef:
812758
intp_t blkno, i, j
813759
cnp.npy_intp length = self.shape[0]
814-
SharedBlock blk
760+
Block blk
815761
BlockPlacement bp
816762
ndarray[intp_t, ndim=1] new_blknos, new_blklocs
817763

@@ -901,7 +847,7 @@ cdef class BlockManager:
901847

902848
cdef BlockManager _slice_mgr_rows(self, slice slobj):
903849
cdef:
904-
SharedBlock blk, nb
850+
Block blk, nb
905851
BlockManager mgr
906852
ndarray blknos, blklocs
907853

@@ -945,18 +891,18 @@ cdef class BlockValuesRefs:
945891
cdef:
946892
public list referenced_blocks
947893

948-
def __cinit__(self, blk: SharedBlock | None = None) -> None:
894+
def __cinit__(self, blk: Block | None = None) -> None:
949895
if blk is not None:
950896
self.referenced_blocks = [weakref.ref(blk)]
951897
else:
952898
self.referenced_blocks = []
953899

954-
def add_reference(self, blk: SharedBlock) -> None:
900+
def add_reference(self, blk: Block) -> None:
955901
"""Adds a new reference to our reference collection.
956902

957903
Parameters
958904
----------
959-
blk: SharedBlock
905+
blk : Block
960906
The block that the new references should point to.
961907
"""
962908
self.referenced_blocks.append(weakref.ref(blk))

pandas/core/generic.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -817,9 +817,7 @@ def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self
817817
assert isinstance(new_mgr, BlockManager)
818818
assert isinstance(self._mgr, BlockManager)
819819
new_mgr.blocks[0].refs = self._mgr.blocks[0].refs
820-
new_mgr.blocks[0].refs.add_reference(
821-
new_mgr.blocks[0] # type: ignore[arg-type]
822-
)
820+
new_mgr.blocks[0].refs.add_reference(new_mgr.blocks[0])
823821
if not using_copy_on_write() and copy is not False:
824822
new_mgr = new_mgr.copy(deep=True)
825823

pandas/core/internals/blocks.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def newfunc(self, *args, **kwargs) -> list[Block]:
147147
return cast(F, newfunc)
148148

149149

150-
class Block(PandasObject):
150+
class Block(PandasObject, libinternals.Block):
151151
"""
152152
Canonical n-dimensional unit of homogeneous dtype contained in a pandas
153153
data structure
@@ -1936,7 +1936,7 @@ def pad_or_backfill(
19361936
return [self.make_block_same_class(new_values)]
19371937

19381938

1939-
class ExtensionBlock(libinternals.Block, EABackedBlock):
1939+
class ExtensionBlock(EABackedBlock):
19401940
"""
19411941
Block for holding extension types.
19421942
@@ -2204,7 +2204,7 @@ def _unstack(
22042204
return blocks, mask
22052205

22062206

2207-
class NumpyBlock(libinternals.NumpyBlock, Block):
2207+
class NumpyBlock(Block):
22082208
values: np.ndarray
22092209
__slots__ = ()
22102210

@@ -2242,7 +2242,7 @@ class ObjectBlock(NumpyBlock):
22422242
__slots__ = ()
22432243

22442244

2245-
class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock):
2245+
class NDArrayBackedExtensionBlock(EABackedBlock):
22462246
"""
22472247
Block backed by an NDArrayBackedExtensionArray
22482248
"""

pandas/core/internals/managers.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -263,9 +263,7 @@ def add_references(self, mgr: BaseBlockManager) -> None:
263263
return
264264
for i, blk in enumerate(self.blocks):
265265
blk.refs = mgr.blocks[i].refs
266-
# Argument 1 to "add_reference" of "BlockValuesRefs" has incompatible type
267-
# "Block"; expected "SharedBlock"
268-
blk.refs.add_reference(blk) # type: ignore[arg-type]
266+
blk.refs.add_reference(blk)
269267

270268
def references_same_values(self, mgr: BaseBlockManager, blkno: int) -> bool:
271269
"""

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -897,7 +897,7 @@ def view(self, dtype: Dtype | None = None) -> Series:
897897
if isinstance(res_ser._mgr, SingleBlockManager):
898898
blk = res_ser._mgr._block
899899
blk.refs = cast("BlockValuesRefs", self._references)
900-
blk.refs.add_reference(blk) # type: ignore[arg-type]
900+
blk.refs.add_reference(blk)
901901
return res_ser.__finalize__(self, method="view")
902902

903903
# ----------------------------------------------------------------------

0 commit comments

Comments
 (0)