Skip to content

Commit c57dff7

Browse files
committed
Use sorting.nargsort and remove some _values_for_argsort
1 parent 1925423 commit c57dff7

File tree

8 files changed

+22
-79
lines changed

8 files changed

+22
-79
lines changed

pandas/core/arrays/base.py

+7-38
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
from pandas._typing import ArrayLike
2525
from pandas.core import ops
26+
from pandas.core.sorting import nargsort
2627

2728
_not_implemented_message = "{} does not implement {}."
2829

@@ -361,27 +362,6 @@ def isna(self) -> ArrayLike:
361362
"""
362363
raise AbstractMethodError(self)
363364

364-
def _values_for_argsort(self) -> Tuple[np.ndarray, np.ndarray]:
365-
"""
366-
Return values for sorting.
367-
368-
Returns
369-
-------
370-
ndarray
371-
The transformed values should maintain the ordering between values
372-
within the array.
373-
ndarray
374-
The mask which indicates the NA values.
375-
376-
.. versionadded:: 0.25.0
377-
378-
See Also
379-
--------
380-
ExtensionArray.argsort
381-
"""
382-
# Note: this is used in `ExtensionArray.argsort`.
383-
return np.array(self), self.isna()
384-
385365
def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
386366
"""
387367
Return the indices that would sort this array.
@@ -410,25 +390,14 @@ def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
410390
# 1. _values_for_argsort : construct the values passed to np.argsort
411391
# 2. argsort : total control over sorting.
412392
ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
413-
values, mask = self._values_for_argsort()
414393

415-
if mask.any():
416-
notmask = ~mask
417-
notnull = np.argsort(values[notmask], kind=kind, **kwargs)
418-
419-
# permu maps the indices of the subarray
420-
# without nan to the indices of the original array.
421-
permu = np.arange(len(mask))
422-
permu = permu[~mask]
423-
424-
notnull = permu[notnull]
425-
allnan = np.arange(len(self))[mask]
426-
result = np.append(notnull, allnan)
394+
if hasattr(self, '_values_for_argsort'):
395+
values = self._values_for_argsort()
427396
else:
428-
result = np.argsort(values, kind=kind, **kwargs)
429-
430-
if not ascending:
431-
result = result[::-1]
397+
values = self
398+
na_position = 'last' if ascending else 'first'
399+
result = nargsort(values, kind=kind, ascending=ascending,
400+
na_position=na_position)
432401
return result
433402

434403
def fillna(self, value=None, method=None, limit=None):

pandas/core/arrays/categorical.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -1521,9 +1521,9 @@ def check_for_ordered(self, op):
15211521
"Categorical to an ordered one\n".format(op=op))
15221522

15231523
def _values_for_argsort(self):
1524-
return self._codes.copy(), self.isna()
1524+
return self._codes.copy()
15251525

1526-
def argsort(self, *args, **kwargs):
1526+
def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
15271527
# TODO(PY2): use correct signature
15281528
# We have to do *args, **kwargs to avoid a a py2-only signature
15291529
# issue since np.argsort differs from argsort.
@@ -1567,8 +1567,12 @@ def argsort(self, *args, **kwargs):
15671567
>>> cat.argsort()
15681568
array([3, 0, 1, 2])
15691569
"""
1570-
# Keep the implementation here just for the docstring.
1571-
return super().argsort(*args, **kwargs)
1570+
ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
1571+
values = self._values_for_argsort()
1572+
result = np.argsort(values, kind=kind, **kwargs)
1573+
if not ascending:
1574+
result = result[::-1]
1575+
return result
15721576

15731577
def sort_values(self, inplace=False, ascending=True, na_position='last'):
15741578
"""

pandas/core/arrays/datetimelike.py

-3
Original file line numberDiff line numberDiff line change
@@ -620,9 +620,6 @@ def _values_for_factorize(self):
620620
def _from_factorized(cls, values, original):
621621
return cls(values, dtype=original.dtype)
622622

623-
def _values_for_argsort(self):
624-
return self._data, self._isnan
625-
626623
# ------------------------------------------------------------------
627624
# Additional array methods
628625
# These are not part of the EA API, but we implement them because

pandas/core/arrays/integer.py

-22
Original file line numberDiff line numberDiff line change
@@ -512,28 +512,6 @@ def value_counts(self, dropna=True):
512512

513513
return Series(array, index=index)
514514

515-
def _values_for_argsort(self) -> Tuple[np.ndarray, np.ndarray]:
516-
"""Return values for sorting.
517-
518-
Returns
519-
-------
520-
ndarray
521-
The transformed values should maintain the ordering between values
522-
within the array.
523-
ndarray
524-
The mask which indicates the NA values.
525-
526-
.. versionadded:: 0.25.0
527-
528-
See Also
529-
--------
530-
ExtensionArray.argsort
531-
"""
532-
data = self._data.copy()
533-
mask = self._mask
534-
data[self._mask] = data.min() - 1
535-
return data, mask
536-
537515
@classmethod
538516
def _create_comparison_method(cls, op):
539517
def cmp_method(self, other):

pandas/core/arrays/numpy_.py

-3
Original file line numberDiff line numberDiff line change
@@ -280,9 +280,6 @@ def take(self, indices, allow_fill=False, fill_value=None):
280280
def copy(self, deep=False):
281281
return type(self)(self._ndarray.copy())
282282

283-
def _values_for_argsort(self):
284-
return self._ndarray, self.isna()
285-
286283
def _values_for_factorize(self):
287284
return self._ndarray, -1
288285

pandas/core/arrays/period.py

-3
Original file line numberDiff line numberDiff line change
@@ -673,9 +673,6 @@ def _check_timedeltalike_freq_compat(self, other):
673673

674674
_raise_on_incompatible(self, other)
675675

676-
def _values_for_argsort(self):
677-
return self._data, self._isnan
678-
679676

680677
PeriodArray._add_comparison_ops()
681678

pandas/core/sorting.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -247,11 +247,12 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'):
247247
mask = isna(items)
248248
cnt_null = mask.sum()
249249
sorted_idx = items.argsort(ascending=ascending, kind=kind)
250-
251-
if ascending and na_position == 'first':
252-
sorted_idx = np.roll(sorted_idx, cnt_null)
253-
elif not ascending and na_position == 'last':
250+
if ascending and na_position == 'last':
251+
# NaN is coded as -1 and is listed in front after sorting
254252
sorted_idx = np.roll(sorted_idx, -cnt_null)
253+
elif not ascending and na_position == 'first':
254+
# NaN is coded as -1 and is listed in the end after sorting
255+
sorted_idx = np.roll(sorted_idx, cnt_null)
255256
return sorted_idx
256257

257258
with warnings.catch_warnings():

pandas/tests/extension/json/array.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def _concat_same_type(cls, to_concat):
171171
return cls(data)
172172

173173
def _values_for_factorize(self):
174-
frozen, _ = self._values_for_argsort()
174+
frozen = self._values_for_argsort()
175175
if len(frozen) == 0:
176176
# _factorize_array expects 1-d array, this is a len-0 2-d array.
177177
frozen = frozen.ravel()
@@ -182,7 +182,7 @@ def _values_for_argsort(self):
182182
# If all the elemnts of self are the same size P, NumPy will
183183
# cast them to an (N, P) array, instead of an (N,) array of tuples.
184184
frozen = [()] + [tuple(x.items()) for x in self]
185-
return np.array(frozen, dtype=object)[1:], self.isna()
185+
return np.array(frozen, dtype=object)[1:]
186186

187187

188188
def make_data():

0 commit comments

Comments
 (0)