Skip to content

REF: use more explicit to_numpy(object) instead of astype(object) in EA implementation #45521

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ class ExtensionArray:
* _formatter : Print scalars inside a Series or DataFrame.

Some methods require casting the ExtensionArray to an ndarray of Python
objects with ``self.astype(object)``, which may be expensive. When
objects with ``self.to_numpy(object)``, which may be expensive. When
performance is a concern, we highly recommend overriding the following
methods:

Expand Down Expand Up @@ -780,7 +780,7 @@ def fillna(
if mask.any():
if method is not None:
func = missing.get_fill_func(method)
new_values, _ = func(self.astype(object), limit=limit, mask=mask)
new_values, _ = func(self.to_numpy(object), limit=limit, mask=mask)
new_values = self._from_sequence(new_values, dtype=self.dtype)
else:
# fill with value
Expand Down Expand Up @@ -859,7 +859,7 @@ def unique(self: ExtensionArrayT) -> ExtensionArrayT:
-------
uniques : ExtensionArray
"""
uniques = unique(self.astype(object))
uniques = unique(self.to_numpy(object))
return self._from_sequence(uniques, dtype=self.dtype)

def searchsorted(
Expand Down Expand Up @@ -911,7 +911,7 @@ def searchsorted(
# 1. Values outside the range of the `data_for_sorting` fixture
# 2. Values between the values in the `data_for_sorting` fixture
# 3. Missing values.
arr = self.astype(object)
arr = self.to_numpy(object)
if isinstance(value, ExtensionArray):
value = value.astype(object)
return arr.searchsorted(value, side=side, sorter=sorter)
Expand Down Expand Up @@ -988,7 +988,7 @@ def _values_for_factorize(self) -> tuple[np.ndarray, Any]:
The values returned by this method are also used in
:func:`pandas.util.hash_pandas_object`.
"""
return self.astype(object), np.nan
return self.to_numpy(object), np.nan

def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]:
"""
Expand Down Expand Up @@ -1166,7 +1166,7 @@ def take(self, indices, allow_fill=False, fill_value=None):

# If the ExtensionArray is backed by an ndarray, then
# just pass that here instead of coercing to object.
data = self.astype(object)
data = self.to_numpy(object)

if allow_fill and fill_value is None:
fill_value = self.dtype.na_value
Expand Down Expand Up @@ -1503,7 +1503,7 @@ def _fill_mask_inplace(
func = missing.get_fill_func(method)
# NB: if we don't copy mask here, it may be altered inplace, which
# would mess up the `self[mask] = ...` below.
new_values, _ = func(self.astype(object), limit=limit, mask=mask.copy())
new_values, _ = func(self.to_numpy(object), limit=limit, mask=mask.copy())
new_values = self._from_sequence(new_values, dtype=self.dtype)
self[mask] = new_values[mask]
return
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def _formatter(self, boxed: bool = False):
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
if is_object_dtype(dtype):
return np.array(list(self), dtype=object)
return self.astype(object)
return self._ndarray

@overload
Expand Down Expand Up @@ -816,14 +816,14 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
pass

elif "mixed" in inferred:
return isin(self.astype(object), values)
return isin(self.to_numpy(object), values)
else:
return np.zeros(self.shape, dtype=bool)

try:
values = type(self)._from_sequence(values)
except ValueError:
return isin(self.astype(object), values)
return isin(self.to_numpy(object), values)

try:
self._check_compatible_with(values)
Expand Down Expand Up @@ -1022,7 +1022,7 @@ def _cmp_method(self, other, op):
# comparing tz-aware and tz-naive
with np.errstate(all="ignore"):
result = ops.comp_method_OBJECT_ARRAY(
op, np.asarray(self.astype(object)), other
op, np.asarray(self.to_numpy(object)), other
)
return result

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1661,7 +1661,7 @@ def isin(self, values) -> np.ndarray:
# not comparable -> no overlap
return np.zeros(self.shape, dtype=bool)

return isin(self.astype(object), values.astype(object))
return isin(self.to_numpy(object), values.astype(object))

@property
def _combined(self) -> ArrayLike:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ def _format_native_types(
"""
actually format my specific types
"""
values = self.astype(object)
values = self.to_numpy(object)

if date_format:
formatter = lambda dt: dt.strftime(date_format)
Expand Down