Skip to content

REF: remove unnecessary Block specialization #54568

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 14 additions & 68 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ cnp.import_array()

from pandas._libs.algos import ensure_int64

from pandas._libs.arrays cimport NDArrayBacked
from pandas._libs.util cimport (
is_array,
is_integer_object,
Expand Down Expand Up @@ -639,14 +638,19 @@ def _unpickle_block(values, placement, ndim):


@cython.freelist(64)
cdef class SharedBlock:
cdef class Block:
"""
Defining __init__ in a cython class significantly improves performance.
"""
cdef:
public BlockPlacement _mgr_locs
public BlockValuesRefs refs
readonly int ndim
# 2023-08-15 no apparent performance improvement from declaring values
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing this would make a difference if we actually had to use attributes of the ndarray that would be access via C instead of via Python, but yea that doesn't look to actually occur in the classes you've removed

# as ndarray in a type-special subclass (similar for NDArrayBacked).
# This might change if slice_block_rows can be optimized with something
# like https://github.com/numpy/numpy/issues/23934
public object values

def __cinit__(
self,
Expand All @@ -666,6 +670,8 @@ cdef class SharedBlock:
refs: BlockValuesRefs, optional
Ref tracking object or None if block does not have any refs.
"""
self.values = values

self._mgr_locs = placement
self.ndim = ndim
if refs is None:
Expand Down Expand Up @@ -699,51 +705,7 @@ cdef class SharedBlock:
ndim = maybe_infer_ndim(self.values, self.mgr_locs)
self.ndim = ndim


cdef class NumpyBlock(SharedBlock):
cdef:
public ndarray values

def __cinit__(
self,
ndarray values,
BlockPlacement placement,
int ndim,
refs: BlockValuesRefs | None = None,
):
# set values here; the (implicit) call to SharedBlock.__cinit__ will
# set placement, ndim and refs
self.values = values

cpdef NumpyBlock slice_block_rows(self, slice slicer):
"""
Perform __getitem__-like specialized to slicing along index.

Assumes self.ndim == 2
"""
new_values = self.values[..., slicer]
return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)


cdef class NDArrayBackedBlock(SharedBlock):
"""
Block backed by NDArrayBackedExtensionArray
"""
cdef public:
NDArrayBacked values

def __cinit__(
self,
NDArrayBacked values,
BlockPlacement placement,
int ndim,
refs: BlockValuesRefs | None = None,
):
# set values here; the (implicit) call to SharedBlock.__cinit__ will
# set placement, ndim and refs
self.values = values

cpdef NDArrayBackedBlock slice_block_rows(self, slice slicer):
cpdef Block slice_block_rows(self, slice slicer):
"""
Perform __getitem__-like specialized to slicing along index.

Expand All @@ -753,22 +715,6 @@ cdef class NDArrayBackedBlock(SharedBlock):
return type(self)(new_values, self._mgr_locs, ndim=self.ndim, refs=self.refs)


cdef class Block(SharedBlock):
cdef:
public object values

def __cinit__(
self,
object values,
BlockPlacement placement,
int ndim,
refs: BlockValuesRefs | None = None,
):
# set values here; the (implicit) call to SharedBlock.__cinit__ will
# set placement, ndim and refs
self.values = values


@cython.freelist(64)
cdef class BlockManager:
cdef:
Expand Down Expand Up @@ -811,7 +757,7 @@ cdef class BlockManager:
cdef:
intp_t blkno, i, j
cnp.npy_intp length = self.shape[0]
SharedBlock blk
Block blk
BlockPlacement bp
ndarray[intp_t, ndim=1] new_blknos, new_blklocs

Expand Down Expand Up @@ -901,7 +847,7 @@ cdef class BlockManager:

cdef BlockManager _slice_mgr_rows(self, slice slobj):
cdef:
SharedBlock blk, nb
Block blk, nb
BlockManager mgr
ndarray blknos, blklocs

Expand Down Expand Up @@ -945,18 +891,18 @@ cdef class BlockValuesRefs:
cdef:
public list referenced_blocks

def __cinit__(self, blk: SharedBlock | None = None) -> None:
def __cinit__(self, blk: Block | None = None) -> None:
if blk is not None:
self.referenced_blocks = [weakref.ref(blk)]
else:
self.referenced_blocks = []

def add_reference(self, blk: SharedBlock) -> None:
def add_reference(self, blk: Block) -> None:
"""Adds a new reference to our reference collection.

Parameters
----------
blk: SharedBlock
blk : Block
The block that the new references should point to.
"""
self.referenced_blocks.append(weakref.ref(blk))
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -817,9 +817,7 @@ def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self
assert isinstance(new_mgr, BlockManager)
assert isinstance(self._mgr, BlockManager)
new_mgr.blocks[0].refs = self._mgr.blocks[0].refs
new_mgr.blocks[0].refs.add_reference(
new_mgr.blocks[0] # type: ignore[arg-type]
)
new_mgr.blocks[0].refs.add_reference(new_mgr.blocks[0])
if not using_copy_on_write() and copy is not False:
new_mgr = new_mgr.copy(deep=True)

Expand Down
8 changes: 4 additions & 4 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def newfunc(self, *args, **kwargs) -> list[Block]:
return cast(F, newfunc)


class Block(PandasObject):
class Block(PandasObject, libinternals.Block):
"""
Canonical n-dimensional unit of homogeneous dtype contained in a pandas
data structure
Expand Down Expand Up @@ -1936,7 +1936,7 @@ def pad_or_backfill(
return [self.make_block_same_class(new_values)]


class ExtensionBlock(libinternals.Block, EABackedBlock):
class ExtensionBlock(EABackedBlock):
"""
Block for holding extension types.

Expand Down Expand Up @@ -2204,7 +2204,7 @@ def _unstack(
return blocks, mask


class NumpyBlock(libinternals.NumpyBlock, Block):
class NumpyBlock(Block):
values: np.ndarray
__slots__ = ()

Expand Down Expand Up @@ -2242,7 +2242,7 @@ class ObjectBlock(NumpyBlock):
__slots__ = ()


class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock):
class NDArrayBackedExtensionBlock(EABackedBlock):
"""
Block backed by an NDArrayBackedExtensionArray
"""
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,7 @@ def add_references(self, mgr: BaseBlockManager) -> None:
return
for i, blk in enumerate(self.blocks):
blk.refs = mgr.blocks[i].refs
# Argument 1 to "add_reference" of "BlockValuesRefs" has incompatible type
# "Block"; expected "SharedBlock"
blk.refs.add_reference(blk) # type: ignore[arg-type]
blk.refs.add_reference(blk)

def references_same_values(self, mgr: BaseBlockManager, blkno: int) -> bool:
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,7 +897,7 @@ def view(self, dtype: Dtype | None = None) -> Series:
if isinstance(res_ser._mgr, SingleBlockManager):
blk = res_ser._mgr._block
blk.refs = cast("BlockValuesRefs", self._references)
blk.refs.add_reference(blk) # type: ignore[arg-type]
blk.refs.add_reference(blk)
return res_ser.__finalize__(self, method="view")

# ----------------------------------------------------------------------
Expand Down