Skip to content

PERF: cache_readonly for Block properties #40620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 50 additions & 29 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
Shape,
final,
)
from pandas.util._decorators import cache_readonly
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -165,7 +166,7 @@ class Block(libinternals.Block, PandasObject):
_validate_ndim = True

@final
@property
@cache_readonly
def _consolidate_key(self):
return self._can_consolidate, self.dtype.name

Expand All @@ -188,7 +189,7 @@ def _can_hold_na(self) -> bool:
return values._can_hold_na

@final
@property
@cache_readonly
def is_categorical(self) -> bool:
warnings.warn(
"Block.is_categorical is deprecated and will be removed in a "
Expand Down Expand Up @@ -217,6 +218,7 @@ def internal_values(self):
"""
return self.values

@property
def array_values(self) -> ExtensionArray:
"""
The array that Series.array returns. Always an ExtensionArray.
Expand Down Expand Up @@ -245,7 +247,7 @@ def get_block_values_for_json(self) -> np.ndarray:
return np.asarray(self.values).reshape(self.shape)

@final
@property
@cache_readonly
def fill_value(self):
# Used in reindex_indexer
return na_value_for_dtype(self.dtype, compat=False)
Expand Down Expand Up @@ -353,7 +355,7 @@ def shape(self) -> Shape:
return self.values.shape

@final
@property
@cache_readonly
def dtype(self) -> DtypeObj:
return self.values.dtype

Expand All @@ -378,6 +380,11 @@ def delete(self, loc) -> None:
"""
self.values = np.delete(self.values, loc, 0)
self.mgr_locs = self._mgr_locs.delete(loc)
try:
self._cache.clear()
except AttributeError:
# _cache not yet initialized
pass

@final
def apply(self, func, **kwargs) -> List[Block]:
Expand Down Expand Up @@ -592,7 +599,7 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
"""
values = self.values
if values.dtype.kind in ["m", "M"]:
values = self.array_values()
values = self.array_values

new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)

Expand Down Expand Up @@ -931,7 +938,7 @@ def setitem(self, indexer, value):
return self.coerce_to_target_dtype(value).setitem(indexer, value)

if self.dtype.kind in ["m", "M"]:
arr = self.array_values().T
arr = self.array_values.T
arr[indexer] = value
return self

Expand Down Expand Up @@ -1445,7 +1452,7 @@ class ExtensionBlock(Block):

values: ExtensionArray

@property
@cache_readonly
def shape(self) -> Shape:
# TODO(EA2D): override unnecessary with 2D EAs
if self.ndim == 1:
Expand Down Expand Up @@ -1476,6 +1483,12 @@ def set_inplace(self, locs, values):
# see GH#33457
assert locs.tolist() == [0]
self.values = values
try:
# TODO(GH33457) this can be removed
self._cache.clear()
except AttributeError:
# _cache not yet initialized
pass

def putmask(self, mask, new) -> List[Block]:
"""
Expand All @@ -1500,7 +1513,7 @@ def is_view(self) -> bool:
"""Extension arrays are never treated as views."""
return False

@property
@cache_readonly
def is_numeric(self):
return self.values.dtype._is_numeric

Expand Down Expand Up @@ -1549,6 +1562,7 @@ def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
# TODO(EA2D): reshape not needed with 2D EAs
return np.asarray(self.values).reshape(self.shape)

@cache_readonly
def array_values(self) -> ExtensionArray:
return self.values

Expand Down Expand Up @@ -1675,10 +1689,7 @@ def where(self, other, cond, errors="raise") -> List[Block]:
# The default `other` for Series / Frame is np.nan
# we want to replace that with the correct NA value
# for the type

# error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no
# attribute "na_value"
other = self.dtype.na_value # type: ignore[union-attr]
other = self.dtype.na_value

if is_sparse(self.values):
# TODO(SparseArray.__setitem__): remove this if condition
Expand Down Expand Up @@ -1739,10 +1750,11 @@ class HybridMixin:
array_values: Callable

def _can_hold_element(self, element: Any) -> bool:
values = self.array_values()
values = self.array_values

try:
values._validate_setitem_value(element)
# error: "Callable[..., Any]" has no attribute "_validate_setitem_value"
values._validate_setitem_value(element) # type: ignore[attr-defined]
return True
except (ValueError, TypeError):
return False
Expand All @@ -1768,9 +1780,7 @@ def _can_hold_element(self, element: Any) -> bool:
if isinstance(element, (IntegerArray, FloatingArray)):
if element._mask.any():
return False
# error: Argument 1 to "can_hold_element" has incompatible type
# "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
return can_hold_element(self.dtype, element) # type: ignore[arg-type]
return can_hold_element(self.dtype, element)


class NDArrayBackedExtensionBlock(HybridMixin, Block):
Expand All @@ -1780,23 +1790,25 @@ class NDArrayBackedExtensionBlock(HybridMixin, Block):

def internal_values(self):
# Override to return DatetimeArray and TimedeltaArray
return self.array_values()
return self.array_values

def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray:
"""
return object dtype as boxed values, such as Timestamps/Timedelta
"""
values = self.array_values()
values = self.array_values
if is_object_dtype(dtype):
# DTA/TDA constructor and astype can handle 2D
values = values.astype(object)
# error: "Callable[..., Any]" has no attribute "astype"
values = values.astype(object) # type: ignore[attr-defined]
# TODO(EA2D): reshape not needed with 2D EAs
return np.asarray(values).reshape(self.shape)

def iget(self, key):
# GH#31649 we need to wrap scalars in Timestamp/Timedelta
# TODO(EA2D): this can be removed if we ever have 2D EA
return self.array_values().reshape(self.shape)[key]
# error: "Callable[..., Any]" has no attribute "reshape"
return self.array_values.reshape(self.shape)[key] # type: ignore[attr-defined]

def putmask(self, mask, new) -> List[Block]:
mask = extract_bool_array(mask)
Expand All @@ -1805,14 +1817,16 @@ def putmask(self, mask, new) -> List[Block]:
return self.astype(object).putmask(mask, new)

# TODO(EA2D): reshape unnecessary with 2D EAs
arr = self.array_values().reshape(self.shape)
# error: "Callable[..., Any]" has no attribute "reshape"
arr = self.array_values.reshape(self.shape) # type: ignore[attr-defined]
arr = cast("NDArrayBackedExtensionArray", arr)
arr.T.putmask(mask, new)
return [self]

def where(self, other, cond, errors="raise") -> List[Block]:
# TODO(EA2D): reshape unnecessary with 2D EAs
arr = self.array_values().reshape(self.shape)
# error: "Callable[..., Any]" has no attribute "reshape"
arr = self.array_values.reshape(self.shape) # type: ignore[attr-defined]

cond = extract_bool_array(cond)

Expand Down Expand Up @@ -1848,15 +1862,17 @@ def diff(self, n: int, axis: int = 0) -> List[Block]:
by apply.
"""
# TODO(EA2D): reshape not necessary with 2D EAs
values = self.array_values().reshape(self.shape)
# error: "Callable[..., Any]" has no attribute "reshape"
values = self.array_values.reshape(self.shape) # type: ignore[attr-defined]

new_values = values - values.shift(n, axis=axis)
new_values = maybe_coerce_values(new_values)
return [self.make_block(new_values)]

def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]:
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs
values = self.array_values().reshape(self.shape)
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EA
# error: "Callable[..., Any]" has no attribute "reshape"
values = self.array_values.reshape(self.shape) # type: ignore[attr-defined]
new_values = values.shift(periods, fill_value=fill_value, axis=axis)
new_values = maybe_coerce_values(new_values)
return [self.make_block_same_class(new_values)]
Expand All @@ -1871,9 +1887,13 @@ def fillna(
# TODO: don't special-case td64
return self.astype(object).fillna(value, limit, inplace, downcast)

values = self.array_values()
values = values if inplace else values.copy()
new_values = values.fillna(value=value, limit=limit)
values = self.array_values
# error: "Callable[..., Any]" has no attribute "copy"
values = values if inplace else values.copy() # type: ignore[attr-defined]
# error: "Callable[..., Any]" has no attribute "fillna"
new_values = values.fillna( # type: ignore[attr-defined]
value=value, limit=limit
)
new_values = maybe_coerce_values(new_values)
return [self.make_block_same_class(values=new_values)]

Expand All @@ -1883,6 +1903,7 @@ class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock):

is_numeric = False

@cache_readonly
def array_values(self):
return ensure_wrapped_if_datetimelike(self.values)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1668,7 +1668,7 @@ def internal_values(self):

def array_values(self):
"""The array that Series.array returns"""
return self._block.array_values()
return self._block.array_values

@property
def _can_hold_na(self) -> bool:
Expand Down