Skip to content

Commit c68b605

Browse files
authored
PERF: cache_readonly for Block properties (#40620)
1 parent a59987b commit c68b605

File tree

2 files changed

+51
-30
lines changed

2 files changed

+51
-30
lines changed

pandas/core/internals/blocks.py

+50-29
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
Shape,
3737
final,
3838
)
39+
from pandas.util._decorators import cache_readonly
3940
from pandas.util._validators import validate_bool_kwarg
4041

4142
from pandas.core.dtypes.cast import (
@@ -165,7 +166,7 @@ class Block(libinternals.Block, PandasObject):
165166
_validate_ndim = True
166167

167168
@final
168-
@property
169+
@cache_readonly
169170
def _consolidate_key(self):
170171
return self._can_consolidate, self.dtype.name
171172

@@ -188,7 +189,7 @@ def _can_hold_na(self) -> bool:
188189
return values._can_hold_na
189190

190191
@final
191-
@property
192+
@cache_readonly
192193
def is_categorical(self) -> bool:
193194
warnings.warn(
194195
"Block.is_categorical is deprecated and will be removed in a "
@@ -217,6 +218,7 @@ def internal_values(self):
217218
"""
218219
return self.values
219220

221+
@property
220222
def array_values(self) -> ExtensionArray:
221223
"""
222224
The array that Series.array returns. Always an ExtensionArray.
@@ -245,7 +247,7 @@ def get_block_values_for_json(self) -> np.ndarray:
245247
return np.asarray(self.values).reshape(self.shape)
246248

247249
@final
248-
@property
250+
@cache_readonly
249251
def fill_value(self):
250252
# Used in reindex_indexer
251253
return na_value_for_dtype(self.dtype, compat=False)
@@ -353,7 +355,7 @@ def shape(self) -> Shape:
353355
return self.values.shape
354356

355357
@final
356-
@property
358+
@cache_readonly
357359
def dtype(self) -> DtypeObj:
358360
return self.values.dtype
359361

@@ -378,6 +380,11 @@ def delete(self, loc) -> None:
378380
"""
379381
self.values = np.delete(self.values, loc, 0)
380382
self.mgr_locs = self._mgr_locs.delete(loc)
383+
try:
384+
self._cache.clear()
385+
except AttributeError:
386+
# _cache not yet initialized
387+
pass
381388

382389
@final
383390
def apply(self, func, **kwargs) -> List[Block]:
@@ -592,7 +599,7 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
592599
"""
593600
values = self.values
594601
if values.dtype.kind in ["m", "M"]:
595-
values = self.array_values()
602+
values = self.array_values
596603

597604
new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
598605

@@ -931,7 +938,7 @@ def setitem(self, indexer, value):
931938
return self.coerce_to_target_dtype(value).setitem(indexer, value)
932939

933940
if self.dtype.kind in ["m", "M"]:
934-
arr = self.array_values().T
941+
arr = self.array_values.T
935942
arr[indexer] = value
936943
return self
937944

@@ -1445,7 +1452,7 @@ class ExtensionBlock(Block):
14451452

14461453
values: ExtensionArray
14471454

1448-
@property
1455+
@cache_readonly
14491456
def shape(self) -> Shape:
14501457
# TODO(EA2D): override unnecessary with 2D EAs
14511458
if self.ndim == 1:
@@ -1476,6 +1483,12 @@ def set_inplace(self, locs, values):
14761483
# see GH#33457
14771484
assert locs.tolist() == [0]
14781485
self.values = values
1486+
try:
1487+
# TODO(GH33457) this can be removed
1488+
self._cache.clear()
1489+
except AttributeError:
1490+
# _cache not yet initialized
1491+
pass
14791492

14801493
def putmask(self, mask, new) -> List[Block]:
14811494
"""
@@ -1500,7 +1513,7 @@ def is_view(self) -> bool:
15001513
"""Extension arrays are never treated as views."""
15011514
return False
15021515

1503-
@property
1516+
@cache_readonly
15041517
def is_numeric(self):
15051518
return self.values.dtype._is_numeric
15061519

@@ -1549,6 +1562,7 @@ def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
15491562
# TODO(EA2D): reshape not needed with 2D EAs
15501563
return np.asarray(self.values).reshape(self.shape)
15511564

1565+
@cache_readonly
15521566
def array_values(self) -> ExtensionArray:
15531567
return self.values
15541568

@@ -1675,10 +1689,7 @@ def where(self, other, cond, errors="raise") -> List[Block]:
16751689
# The default `other` for Series / Frame is np.nan
16761690
# we want to replace that with the correct NA value
16771691
# for the type
1678-
1679-
# error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no
1680-
# attribute "na_value"
1681-
other = self.dtype.na_value # type: ignore[union-attr]
1692+
other = self.dtype.na_value
16821693

16831694
if is_sparse(self.values):
16841695
# TODO(SparseArray.__setitem__): remove this if condition
@@ -1739,10 +1750,11 @@ class HybridMixin:
17391750
array_values: Callable
17401751

17411752
def _can_hold_element(self, element: Any) -> bool:
1742-
values = self.array_values()
1753+
values = self.array_values
17431754

17441755
try:
1745-
values._validate_setitem_value(element)
1756+
# error: "Callable[..., Any]" has no attribute "_validate_setitem_value"
1757+
values._validate_setitem_value(element) # type: ignore[attr-defined]
17461758
return True
17471759
except (ValueError, TypeError):
17481760
return False
@@ -1768,9 +1780,7 @@ def _can_hold_element(self, element: Any) -> bool:
17681780
if isinstance(element, (IntegerArray, FloatingArray)):
17691781
if element._mask.any():
17701782
return False
1771-
# error: Argument 1 to "can_hold_element" has incompatible type
1772-
# "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
1773-
return can_hold_element(self.dtype, element) # type: ignore[arg-type]
1783+
return can_hold_element(self.dtype, element)
17741784

17751785

17761786
class NDArrayBackedExtensionBlock(HybridMixin, Block):
@@ -1780,23 +1790,25 @@ class NDArrayBackedExtensionBlock(HybridMixin, Block):
17801790

17811791
def internal_values(self):
17821792
# Override to return DatetimeArray and TimedeltaArray
1783-
return self.array_values()
1793+
return self.array_values
17841794

17851795
def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
17861796
"""
17871797
return object dtype as boxed values, such as Timestamps/Timedelta
17881798
"""
1789-
values = self.array_values()
1799+
values = self.array_values
17901800
if is_object_dtype(dtype):
17911801
# DTA/TDA constructor and astype can handle 2D
1792-
values = values.astype(object)
1802+
# error: "Callable[..., Any]" has no attribute "astype"
1803+
values = values.astype(object) # type: ignore[attr-defined]
17931804
# TODO(EA2D): reshape not needed with 2D EAs
17941805
return np.asarray(values).reshape(self.shape)
17951806

17961807
def iget(self, key):
17971808
# GH#31649 we need to wrap scalars in Timestamp/Timedelta
17981809
# TODO(EA2D): this can be removed if we ever have 2D EA
1799-
return self.array_values().reshape(self.shape)[key]
1810+
# error: "Callable[..., Any]" has no attribute "reshape"
1811+
return self.array_values.reshape(self.shape)[key] # type: ignore[attr-defined]
18001812

18011813
def putmask(self, mask, new) -> List[Block]:
18021814
mask = extract_bool_array(mask)
@@ -1805,14 +1817,16 @@ def putmask(self, mask, new) -> List[Block]:
18051817
return self.astype(object).putmask(mask, new)
18061818

18071819
# TODO(EA2D): reshape unnecessary with 2D EAs
1808-
arr = self.array_values().reshape(self.shape)
1820+
# error: "Callable[..., Any]" has no attribute "reshape"
1821+
arr = self.array_values.reshape(self.shape) # type: ignore[attr-defined]
18091822
arr = cast("NDArrayBackedExtensionArray", arr)
18101823
arr.T.putmask(mask, new)
18111824
return [self]
18121825

18131826
def where(self, other, cond, errors="raise") -> List[Block]:
18141827
# TODO(EA2D): reshape unnecessary with 2D EAs
1815-
arr = self.array_values().reshape(self.shape)
1828+
# error: "Callable[..., Any]" has no attribute "reshape"
1829+
arr = self.array_values.reshape(self.shape) # type: ignore[attr-defined]
18161830

18171831
cond = extract_bool_array(cond)
18181832

@@ -1848,15 +1862,17 @@ def diff(self, n: int, axis: int = 0) -> List[Block]:
18481862
by apply.
18491863
"""
18501864
# TODO(EA2D): reshape not necessary with 2D EAs
1851-
values = self.array_values().reshape(self.shape)
1865+
# error: "Callable[..., Any]" has no attribute "reshape"
1866+
values = self.array_values.reshape(self.shape) # type: ignore[attr-defined]
18521867

18531868
new_values = values - values.shift(n, axis=axis)
18541869
new_values = maybe_coerce_values(new_values)
18551870
return [self.make_block(new_values)]
18561871

18571872
def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]:
1858-
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs
1859-
values = self.array_values().reshape(self.shape)
1873+
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EA
1874+
# error: "Callable[..., Any]" has no attribute "reshape"
1875+
values = self.array_values.reshape(self.shape) # type: ignore[attr-defined]
18601876
new_values = values.shift(periods, fill_value=fill_value, axis=axis)
18611877
new_values = maybe_coerce_values(new_values)
18621878
return [self.make_block_same_class(new_values)]
@@ -1871,9 +1887,13 @@ def fillna(
18711887
# TODO: don't special-case td64
18721888
return self.astype(object).fillna(value, limit, inplace, downcast)
18731889

1874-
values = self.array_values()
1875-
values = values if inplace else values.copy()
1876-
new_values = values.fillna(value=value, limit=limit)
1890+
values = self.array_values
1891+
# error: "Callable[..., Any]" has no attribute "copy"
1892+
values = values if inplace else values.copy() # type: ignore[attr-defined]
1893+
# error: "Callable[..., Any]" has no attribute "fillna"
1894+
new_values = values.fillna( # type: ignore[attr-defined]
1895+
value=value, limit=limit
1896+
)
18771897
new_values = maybe_coerce_values(new_values)
18781898
return [self.make_block_same_class(values=new_values)]
18791899

@@ -1883,6 +1903,7 @@ class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock):
18831903

18841904
is_numeric = False
18851905

1906+
@cache_readonly
18861907
def array_values(self):
18871908
return ensure_wrapped_if_datetimelike(self.values)
18881909

pandas/core/internals/managers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1668,7 +1668,7 @@ def internal_values(self):
16681668

16691669
def array_values(self):
16701670
"""The array that Series.array returns"""
1671-
return self._block.array_values()
1671+
return self._block.array_values
16721672

16731673
@property
16741674
def _can_hold_na(self) -> bool:

0 commit comments

Comments
 (0)