diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi index ce112413f8a64..ffe6c7730bcdc 100644 --- a/pandas/_libs/internals.pyi +++ b/pandas/_libs/internals.pyi @@ -15,7 +15,6 @@ from pandas._typing import ( ) from pandas import Index -from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.internals.blocks import Block as B def slice_len(slc: slice, objlen: int = ...) -> int: ... @@ -60,7 +59,7 @@ class BlockPlacement: def append(self, others: list[BlockPlacement]) -> BlockPlacement: ... def tile_for_unstack(self, factor: int) -> npt.NDArray[np.intp]: ... -class SharedBlock: +class Block: _mgr_locs: BlockPlacement ndim: int values: ArrayLike @@ -72,19 +71,8 @@ class SharedBlock: ndim: int, refs: BlockValuesRefs | None = ..., ) -> None: ... - -class NumpyBlock(SharedBlock): - values: np.ndarray - @final - def slice_block_rows(self, slicer: slice) -> Self: ... - -class NDArrayBackedBlock(SharedBlock): - values: NDArrayBackedExtensionArray - @final def slice_block_rows(self, slicer: slice) -> Self: ... -class Block(SharedBlock): ... - class BlockManager: blocks: tuple[B, ...] axes: list[Index] @@ -100,7 +88,7 @@ class BlockManager: class BlockValuesRefs: referenced_blocks: list[weakref.ref] - def __init__(self, blk: SharedBlock | None = ...) -> None: ... - def add_reference(self, blk: SharedBlock) -> None: ... + def __init__(self, blk: Block | None = ...) -> None: ... + def add_reference(self, blk: Block) -> None: ... def add_index_reference(self, index: Index) -> None: ... def has_reference(self) -> bool: ... diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index adf4e8c926fa3..7a9a3b84fd69f 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -24,7 +24,6 @@ cnp.import_array() from pandas._libs.algos import ensure_int64 -from pandas._libs.arrays cimport NDArrayBacked from pandas._libs.util cimport ( is_array, is_integer_object, @@ -639,7 +638,7 @@ def _unpickle_block(values, placement, ndim): @cython.freelist(64) -cdef class SharedBlock: +cdef class Block: """ Defining __init__ in a cython class significantly improves performance. """ @@ -647,6 +646,11 @@ cdef class SharedBlock: public BlockPlacement _mgr_locs public BlockValuesRefs refs readonly int ndim + # 2023-08-15 no apparent performance improvement from declaring values + # as ndarray in a type-special subclass (similar for NDArrayBacked). + # This might change if slice_block_rows can be optimized with something + # like https://github.com/numpy/numpy/issues/23934 + public object values def __cinit__( self, @@ -666,6 +670,8 @@ cdef class SharedBlock: refs: BlockValuesRefs, optional Ref tracking object or None if block does not have any refs. """ + self.values = values + self._mgr_locs = placement self.ndim = ndim if refs is None: @@ -699,51 +705,7 @@ cdef class SharedBlock: ndim = maybe_infer_ndim(self.values, self.mgr_locs) self.ndim = ndim - -cdef class NumpyBlock(SharedBlock): - cdef: - public ndarray values - - def __cinit__( - self, - ndarray values, - BlockPlacement placement, - int ndim, - refs: BlockValuesRefs | None = None, - ): - # set values here; the (implicit) call to SharedBlock.__cinit__ will - # set placement, ndim and refs - self.values = values - - cpdef NumpyBlock slice_block_rows(self, slice slicer): - """ - Perform __getitem__-like specialized to slicing along index. - - Assumes self.ndim == 2 - """ - new_values = self.values[..., slicer] - return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs) - - -cdef class NDArrayBackedBlock(SharedBlock): - """ - Block backed by NDArrayBackedExtensionArray - """ - cdef public: - NDArrayBacked values - - def __cinit__( - self, - NDArrayBacked values, - BlockPlacement placement, - int ndim, - refs: BlockValuesRefs | None = None, - ): - # set values here; the (implicit) call to SharedBlock.__cinit__ will - # set placement, ndim and refs - self.values = values - - cpdef NDArrayBackedBlock slice_block_rows(self, slice slicer): + cpdef Block slice_block_rows(self, slice slicer): """ Perform __getitem__-like specialized to slicing along index. @@ -753,22 +715,6 @@ cdef class NDArrayBackedBlock(SharedBlock): return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs) -cdef class Block(SharedBlock): - cdef: - public object values - - def __cinit__( - self, - object values, - BlockPlacement placement, - int ndim, - refs: BlockValuesRefs | None = None, - ): - # set values here; the (implicit) call to SharedBlock.__cinit__ will - # set placement, ndim and refs - self.values = values - - @cython.freelist(64) cdef class BlockManager: cdef: @@ -811,7 +757,7 @@ cdef class BlockManager: cdef: intp_t blkno, i, j cnp.npy_intp length = self.shape[0] - SharedBlock blk + Block blk BlockPlacement bp ndarray[intp_t, ndim=1] new_blknos, new_blklocs @@ -901,7 +847,7 @@ cdef class BlockManager: cdef BlockManager _slice_mgr_rows(self, slice slobj): cdef: - SharedBlock blk, nb + Block blk, nb BlockManager mgr ndarray blknos, blklocs @@ -945,18 +891,18 @@ cdef class BlockValuesRefs: cdef: public list referenced_blocks - def __cinit__(self, blk: SharedBlock | None = None) -> None: + def __cinit__(self, blk: Block | None = None) -> None: if blk is not None: self.referenced_blocks = [weakref.ref(blk)] else: self.referenced_blocks = [] - def add_reference(self, blk: SharedBlock) -> None: + def add_reference(self, blk: Block) -> None: """Adds a new reference to our reference collection. Parameters ---------- - blk: SharedBlock + blk : Block The block that the new references should point to. """ self.referenced_blocks.append(weakref.ref(blk)) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index be0d046697ba9..1d01b66a49edc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -817,9 +817,7 @@ def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self assert isinstance(new_mgr, BlockManager) assert isinstance(self._mgr, BlockManager) new_mgr.blocks[0].refs = self._mgr.blocks[0].refs - new_mgr.blocks[0].refs.add_reference( - new_mgr.blocks[0] # type: ignore[arg-type] - ) + new_mgr.blocks[0].refs.add_reference(new_mgr.blocks[0]) if not using_copy_on_write() and copy is not False: new_mgr = new_mgr.copy(deep=True) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ecb9cd47d7995..eca0df67ff054 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -147,7 +147,7 @@ def newfunc(self, *args, **kwargs) -> list[Block]: return cast(F, newfunc) -class Block(PandasObject): +class Block(PandasObject, libinternals.Block): """ Canonical n-dimensional unit of homogeneous dtype contained in a pandas data structure @@ -1936,7 +1936,7 @@ def pad_or_backfill( return [self.make_block_same_class(new_values)] -class ExtensionBlock(libinternals.Block, EABackedBlock): +class ExtensionBlock(EABackedBlock): """ Block for holding extension types. @@ -2204,7 +2204,7 @@ def _unstack( return blocks, mask -class NumpyBlock(libinternals.NumpyBlock, Block): +class NumpyBlock(Block): values: np.ndarray __slots__ = () @@ -2242,7 +2242,7 @@ class ObjectBlock(NumpyBlock): __slots__ = () -class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock): +class NDArrayBackedExtensionBlock(EABackedBlock): """ Block backed by an NDArrayBackedExtensionArray """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 7bb579b22aeed..98d2934678546 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -263,9 +263,7 @@ def add_references(self, mgr: BaseBlockManager) -> None: return for i, blk in enumerate(self.blocks): blk.refs = mgr.blocks[i].refs - # Argument 1 to "add_reference" of "BlockValuesRefs" has incompatible type - # "Block"; expected "SharedBlock" - blk.refs.add_reference(blk) # type: ignore[arg-type] + blk.refs.add_reference(blk) def references_same_values(self, mgr: BaseBlockManager, blkno: int) -> bool: """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 020fadc359ebd..28c877798125d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -897,7 +897,7 @@ def view(self, dtype: Dtype | None = None) -> Series: if isinstance(res_ser._mgr, SingleBlockManager): blk = res_ser._mgr._block blk.refs = cast("BlockValuesRefs", self._references) - blk.refs.add_reference(blk) # type: ignore[arg-type] + blk.refs.add_reference(blk) return res_ser.__finalize__(self, method="view") # ----------------------------------------------------------------------