Skip to content

Commit ba3264a

Browse files
authored
PERF: Define Block.__init__ in cython (#40586)
1 parent 2019e7b commit ba3264a

File tree

3 files changed

+58
-48
lines changed

3 files changed

+58
-48
lines changed

pandas/_libs/internals.pyx

+50
Original file line numberDiff line numberDiff line change
@@ -455,3 +455,53 @@ def get_blkno_placements(blknos, group: bool = True):
455455

456456
for blkno, indexer in get_blkno_indexers(blknos, group):
457457
yield blkno, BlockPlacement(indexer)
458+
459+
460+
@cython.freelist(64)
461+
cdef class Block:
462+
"""
463+
Defining __init__ in a cython class significantly improves performance.
464+
"""
465+
cdef:
466+
public BlockPlacement _mgr_locs
467+
readonly int ndim
468+
public object values
469+
470+
def __cinit__(self, values, placement: BlockPlacement, ndim: int):
471+
"""
472+
Parameters
473+
----------
474+
values : np.ndarray or ExtensionArray
475+
We assume maybe_coerce_values has already been called.
476+
placement : BlockPlacement
477+
ndim : int
478+
1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame
479+
"""
480+
self._mgr_locs = placement
481+
self.ndim = ndim
482+
self.values = values
483+
484+
cpdef __reduce__(self):
485+
# We have to do some gymnastics b/c "ndim" is keyword-only
486+
from functools import partial
487+
488+
from pandas.core.internals.blocks import new_block
489+
490+
args = (self.values, self.mgr_locs.indexer)
491+
func = partial(new_block, ndim=self.ndim)
492+
return func, args
493+
494+
cpdef __setstate__(self, state):
495+
from pandas.core.construction import extract_array
496+
497+
self.mgr_locs = BlockPlacement(state[0])
498+
self.values = extract_array(state[1], extract_numpy=True)
499+
if len(state) > 2:
500+
# we stored ndim
501+
self.ndim = state[2]
502+
else:
503+
# older pickle
504+
from pandas.core.internals.api import maybe_infer_ndim
505+
506+
ndim = maybe_infer_ndim(self.values, self.mgr_locs)
507+
self.ndim = ndim

pandas/core/internals/api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,13 @@ def make_block(
5959
if not isinstance(placement, BlockPlacement):
6060
placement = BlockPlacement(placement)
6161

62-
ndim = _maybe_infer_ndim(values, placement, ndim)
62+
ndim = maybe_infer_ndim(values, placement, ndim)
6363
check_ndim(values, placement, ndim)
6464
values = maybe_coerce_values(values)
6565
return klass(values, ndim=ndim, placement=placement)
6666

6767

68-
def _maybe_infer_ndim(values, placement: BlockPlacement, ndim: Optional[int]) -> int:
68+
def maybe_infer_ndim(values, placement: BlockPlacement, ndim: Optional[int]) -> int:
6969
"""
7070
If `ndim` is not provided, infer it from placment and values.
7171
"""

pandas/core/internals/blocks.py

+6-46
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def newfunc(self, *args, **kwargs) -> List[Block]:
146146
return cast(F, newfunc)
147147

148148

149-
class Block(PandasObject):
149+
class Block(libinternals.Block, PandasObject):
150150
"""
151151
Canonical n-dimensional unit of homogeneous dtype contained in a pandas
152152
data structure
@@ -156,43 +156,14 @@ class Block(PandasObject):
156156

157157
values: Union[np.ndarray, ExtensionArray]
158158

159-
__slots__ = ["_mgr_locs", "values", "ndim"]
159+
__slots__ = ()
160160
is_numeric = False
161161
is_bool = False
162162
is_object = False
163163
is_extension = False
164164
_can_consolidate = True
165165
_validate_ndim = True
166166

167-
@classmethod
168-
def _simple_new(
169-
cls, values: ArrayLike, placement: BlockPlacement, ndim: int
170-
) -> Block:
171-
"""
172-
Fastpath constructor, does *no* validation
173-
"""
174-
obj = object.__new__(cls)
175-
obj.ndim = ndim
176-
obj.values = values
177-
obj._mgr_locs = placement
178-
return obj
179-
180-
def __init__(self, values, placement: BlockPlacement, ndim: int):
181-
"""
182-
Parameters
183-
----------
184-
values : np.ndarray or ExtensionArray
185-
We assume maybe_coerce_values has already been called.
186-
placement : BlockPlacement (or castable)
187-
ndim : int
188-
1 for SingleBlockManager/Series, 2 for BlockManager/DataFrame
189-
"""
190-
assert isinstance(ndim, int)
191-
assert isinstance(placement, BlockPlacement)
192-
self.ndim = ndim
193-
self._mgr_locs = placement
194-
self.values = values
195-
196167
@final
197168
@property
198169
def _consolidate_key(self):
@@ -277,7 +248,6 @@ def mgr_locs(self) -> BlockPlacement:
277248

278249
@mgr_locs.setter
279250
def mgr_locs(self, new_mgr_locs: BlockPlacement):
280-
assert isinstance(new_mgr_locs, BlockPlacement)
281251
self._mgr_locs = new_mgr_locs
282252

283253
@final
@@ -322,16 +292,6 @@ def __repr__(self) -> str:
322292
def __len__(self) -> int:
323293
return len(self.values)
324294

325-
@final
326-
def __getstate__(self):
327-
return self.mgr_locs.indexer, self.values
328-
329-
@final
330-
def __setstate__(self, state):
331-
self.mgr_locs = libinternals.BlockPlacement(state[0])
332-
self.values = extract_array(state[1], extract_numpy=True)
333-
self.ndim = self.values.ndim
334-
335295
def _slice(self, slicer):
336296
""" return a slice of my values """
337297

@@ -352,7 +312,7 @@ def getitem_block(self, slicer) -> Block:
352312
if new_values.ndim != self.values.ndim:
353313
raise ValueError("Only same dim slicing is allowed")
354314

355-
return type(self)._simple_new(new_values, new_mgr_locs, self.ndim)
315+
return type(self)(new_values, new_mgr_locs, self.ndim)
356316

357317
@final
358318
def getitem_block_index(self, slicer: slice) -> Block:
@@ -364,7 +324,7 @@ def getitem_block_index(self, slicer: slice) -> Block:
364324
# error: Invalid index type "Tuple[ellipsis, slice]" for
365325
# "Union[ndarray, ExtensionArray]"; expected type "Union[int, slice, ndarray]"
366326
new_values = self.values[..., slicer] # type: ignore[index]
367-
return type(self)._simple_new(new_values, self._mgr_locs, ndim=self.ndim)
327+
return type(self)(new_values, self._mgr_locs, ndim=self.ndim)
368328

369329
@final
370330
def getitem_block_columns(self, slicer, new_mgr_locs: BlockPlacement) -> Block:
@@ -378,7 +338,7 @@ def getitem_block_columns(self, slicer, new_mgr_locs: BlockPlacement) -> Block:
378338
if new_values.ndim != self.values.ndim:
379339
raise ValueError("Only same dim slicing is allowed")
380340

381-
return type(self)._simple_new(new_values, new_mgr_locs, self.ndim)
341+
return type(self)(new_values, new_mgr_locs, self.ndim)
382342

383343
@property
384344
def shape(self) -> Shape:
@@ -1911,7 +1871,7 @@ def set_inplace(self, locs, values):
19111871
self.values[locs] = values
19121872

19131873

1914-
class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
1874+
class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlockMixin):
19151875
""" implement a datetime64 block with a tz attribute """
19161876

19171877
values: DatetimeArray

0 commit comments

Comments
 (0)