Skip to content

Commit 2960dfa

Browse files
topper-123im-vinicius
authored and
im-vinicius
committed
PERF: add ._simple_new method to masked arrays (pandas-dev#53013)
* PERF: add _simple_new method to masked arrays
1 parent 1da1279 commit 2960dfa

File tree

3 files changed

+33
-18
lines changed

3 files changed

+33
-18
lines changed

doc/source/whatsnew/v2.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,8 @@ Performance improvements
292292
- Performance improvement in :meth:`Series.corr` and :meth:`Series.cov` for extension dtypes (:issue:`52502`)
293293
- Performance improvement in :meth:`Series.to_numpy` when dtype is a numpy float dtype and ``na_value`` is ``np.nan`` (:issue:`52430`)
294294
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`52525`)
295+
- Performance improvement when doing various reshaping operations on :class:`arrays.IntegerArrays` & :class:`arrays.FloatingArray` by avoiding doing unnecessary validation (:issue:`53013`)
296+
-
295297

296298
.. ---------------------------------------------------------------------------
297299
.. _whatsnew_210.bug_fixes:

pandas/core/arrays/boolean.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from pandas._typing import (
3131
Dtype,
3232
DtypeObj,
33+
Self,
3334
npt,
3435
type_t,
3536
)
@@ -296,6 +297,12 @@ class BooleanArray(BaseMaskedArray):
296297
_TRUE_VALUES = {"True", "TRUE", "true", "1", "1.0"}
297298
_FALSE_VALUES = {"False", "FALSE", "false", "0", "0.0"}
298299

300+
@classmethod
301+
def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self:
302+
result = super()._simple_new(values, mask)
303+
result._dtype = BooleanDtype()
304+
return result
305+
299306
def __init__(
300307
self, values: np.ndarray, mask: np.ndarray, copy: bool = False
301308
) -> None:
@@ -390,7 +397,7 @@ def _accumulate(
390397
if name in ("cummin", "cummax"):
391398
op = getattr(masked_accumulations, name)
392399
data, mask = op(data, mask, skipna=skipna, **kwargs)
393-
return type(self)(data, mask, copy=False)
400+
return self._simple_new(data, mask)
394401
else:
395402
from pandas.core.arrays import IntegerArray
396403

pandas/core/arrays/masked.py

+23-17
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,13 @@ class BaseMaskedArray(OpsMixin, ExtensionArray):
110110
_truthy_value = Scalar # bool(_truthy_value) = True
111111
_falsey_value = Scalar # bool(_falsey_value) = False
112112

113+
@classmethod
114+
def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self:
115+
result = BaseMaskedArray.__new__(cls)
116+
result._data = values
117+
result._mask = mask
118+
return result
119+
113120
def __init__(
114121
self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False
115122
) -> None:
@@ -169,7 +176,7 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
169176
return self.dtype.na_value
170177
return self._data[item]
171178

172-
return type(self)(self._data[item], newmask)
179+
return self._simple_new(self._data[item], newmask)
173180

174181
@doc(ExtensionArray.fillna)
175182
def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
@@ -185,7 +192,7 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self:
185192
npvalues = self._data.copy().T
186193
new_mask = mask.copy().T
187194
func(npvalues, limit=limit, mask=new_mask)
188-
return type(self)(npvalues.T, new_mask.T)
195+
return self._simple_new(npvalues.T, new_mask.T)
189196
else:
190197
# fill with value
191198
new_values = self.copy()
@@ -282,17 +289,17 @@ def ndim(self) -> int:
282289
def swapaxes(self, axis1, axis2) -> Self:
283290
data = self._data.swapaxes(axis1, axis2)
284291
mask = self._mask.swapaxes(axis1, axis2)
285-
return type(self)(data, mask)
292+
return self._simple_new(data, mask)
286293

287294
def delete(self, loc, axis: AxisInt = 0) -> Self:
288295
data = np.delete(self._data, loc, axis=axis)
289296
mask = np.delete(self._mask, loc, axis=axis)
290-
return type(self)(data, mask)
297+
return self._simple_new(data, mask)
291298

292299
def reshape(self, *args, **kwargs) -> Self:
293300
data = self._data.reshape(*args, **kwargs)
294301
mask = self._mask.reshape(*args, **kwargs)
295-
return type(self)(data, mask)
302+
return self._simple_new(data, mask)
296303

297304
def ravel(self, *args, **kwargs) -> Self:
298305
# TODO: need to make sure we have the same order for data/mask
@@ -302,7 +309,7 @@ def ravel(self, *args, **kwargs) -> Self:
302309

303310
@property
304311
def T(self) -> Self:
305-
return type(self)(self._data.T, self._mask.T)
312+
return self._simple_new(self._data.T, self._mask.T)
306313

307314
def round(self, decimals: int = 0, *args, **kwargs):
308315
"""
@@ -338,16 +345,16 @@ def round(self, decimals: int = 0, *args, **kwargs):
338345
# Unary Methods
339346

340347
def __invert__(self) -> Self:
341-
return type(self)(~self._data, self._mask.copy())
348+
return self._simple_new(~self._data, self._mask.copy())
342349

343350
def __neg__(self) -> Self:
344-
return type(self)(-self._data, self._mask.copy())
351+
return self._simple_new(-self._data, self._mask.copy())
345352

346353
def __pos__(self) -> Self:
347354
return self.copy()
348355

349356
def __abs__(self) -> Self:
350-
return type(self)(abs(self._data), self._mask.copy())
357+
return self._simple_new(abs(self._data), self._mask.copy())
351358

352359
# ------------------------------------------------------------------
353360

@@ -868,7 +875,7 @@ def take(
868875
result[fill_mask] = fill_value
869876
mask = mask ^ fill_mask
870877

871-
return type(self)(result, mask, copy=False)
878+
return self._simple_new(result, mask)
872879

873880
# error: Return type "BooleanArray" of "isin" incompatible with return type
874881
# "ndarray" in supertype "ExtensionArray"
@@ -893,10 +900,9 @@ def isin(self, values) -> BooleanArray: # type: ignore[override]
893900
return BooleanArray(result, mask, copy=False)
894901

895902
def copy(self) -> Self:
896-
data, mask = self._data, self._mask
897-
data = data.copy()
898-
mask = mask.copy()
899-
return type(self)(data, mask, copy=False)
903+
data = self._data.copy()
904+
mask = self._mask.copy()
905+
return self._simple_new(data, mask)
900906

901907
def unique(self) -> Self:
902908
"""
@@ -907,7 +913,7 @@ def unique(self) -> Self:
907913
uniques : BaseMaskedArray
908914
"""
909915
uniques, mask = algos.unique_with_mask(self._data, self._mask)
910-
return type(self)(uniques, mask, copy=False)
916+
return self._simple_new(uniques, mask)
911917

912918
@doc(ExtensionArray.searchsorted)
913919
def searchsorted(
@@ -959,7 +965,7 @@ def factorize(
959965
# dummy value for uniques; not used since uniques_mask will be True
960966
uniques = np.insert(uniques, na_code, 0)
961967
uniques_mask[na_code] = True
962-
uniques_ea = type(self)(uniques, uniques_mask)
968+
uniques_ea = self._simple_new(uniques, uniques_mask)
963969

964970
return codes, uniques_ea
965971

@@ -1374,7 +1380,7 @@ def _accumulate(
13741380
op = getattr(masked_accumulations, name)
13751381
data, mask = op(data, mask, skipna=skipna, **kwargs)
13761382

1377-
return type(self)(data, mask, copy=False)
1383+
return self._simple_new(data, mask)
13781384

13791385
# ------------------------------------------------------------------
13801386
# GroupBy Methods

0 commit comments

Comments
 (0)