Skip to content

Commit 4fff1c9

Browse files
authored
PERF: MaskedArray._values_for_argsort (#45434)
1 parent 3510b1f commit 4fff1c9

File tree

5 files changed

+14
-40
lines changed

5 files changed

+14
-40
lines changed

pandas/core/arrays/base.py

+10
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,16 @@ def _values_for_argsort(self) -> np.ndarray:
628628
See Also
629629
--------
630630
ExtensionArray.argsort : Return the indices that would sort this array.
631+
632+
Notes
633+
-----
634+
The caller is responsible for *not* modifying these values in-place, so
635+
it is safe for implementors to give views on `self`.
636+
637+
Functions that use this (e.g. ExtensionArray.argsort) should ignore
638+
entries with missing values in the original array (according to `self.isna()`).
639+
This means that the corresponding entries in the returned array don't need to
640+
be modified to sort correctly.
631641
"""
632642
# Note: this is used in `ExtensionArray.argsort`.
633643
return np.array(self)

pandas/core/arrays/boolean.py

-18
Original file line numberDiff line numberDiff line change
@@ -421,24 +421,6 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
421421
# coerce
422422
return self.to_numpy(dtype=dtype, na_value=na_value, copy=False)
423423

424-
def _values_for_argsort(self) -> np.ndarray:
425-
"""
426-
Return values for sorting.
427-
428-
Returns
429-
-------
430-
ndarray
431-
The transformed values should maintain the ordering between values
432-
within the array.
433-
434-
See Also
435-
--------
436-
ExtensionArray.argsort : Return the indices that would sort this array.
437-
"""
438-
data = self._data.copy()
439-
data[self._mask] = -1
440-
return data
441-
442424
def _logical_method(self, other, op):
443425

444426
assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}

pandas/core/arrays/floating.py

-3
Original file line numberDiff line numberDiff line change
@@ -244,9 +244,6 @@ def _coerce_to_array(
244244
) -> tuple[np.ndarray, np.ndarray]:
245245
return coerce_to_array(value, dtype=dtype, copy=copy)
246246

247-
def _values_for_argsort(self) -> np.ndarray:
248-
return self._data
249-
250247

251248
_dtype_docstring = """
252249
An ExtensionDtype for {dtype} data.

pandas/core/arrays/integer.py

-19
Original file line numberDiff line numberDiff line change
@@ -301,25 +301,6 @@ def _coerce_to_array(
301301
) -> tuple[np.ndarray, np.ndarray]:
302302
return coerce_to_array(value, dtype=dtype, copy=copy)
303303

304-
def _values_for_argsort(self) -> np.ndarray:
305-
"""
306-
Return values for sorting.
307-
308-
Returns
309-
-------
310-
ndarray
311-
The transformed values should maintain the ordering between values
312-
within the array.
313-
314-
See Also
315-
--------
316-
ExtensionArray.argsort : Return the indices that would sort this array.
317-
"""
318-
data = self._data.copy()
319-
if self._mask.any():
320-
data[self._mask] = data.min() - 1
321-
return data
322-
323304

324305
_dtype_docstring = """
325306
An ExtensionDtype for {dtype} integer data.

pandas/core/arrays/masked.py

+4
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,10 @@ def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]:
757757
uniques_ea = type(self)(uniques, np.zeros(len(uniques), dtype=bool))
758758
return codes, uniques_ea
759759

760+
@doc(ExtensionArray._values_for_argsort)
761+
def _values_for_argsort(self) -> np.ndarray:
762+
return self._data
763+
760764
def value_counts(self, dropna: bool = True) -> Series:
761765
"""
762766
Returns a Series containing counts of each unique value.

0 commit comments

Comments
 (0)