pandas-dev · makbigc · May 11, 2019 · May 12, 2019 · May 12, 2019 · May 12, 2019
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -82,7 +82,6 @@ Other Enhancements
 - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
 - :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`)
 - :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`)
--
 
 .. _whatsnew_0250.api_breaking:
 
@@ -438,6 +437,7 @@ Other API Changes
 - Most Pandas classes had a ``__bytes__`` method, which was used for getting a python2-style bytestring representation of the object. This method has been removed as a part of dropping Python2 (:issue:`26447`)
 - The ``.str``-accessor has been disabled for 1-level :class:`MultiIndex`, use :meth:`MultiIndex.to_flat_index` if necessary (:issue:`23679`)
 - Removed support of gtk package for clipboards (:issue:`26563`)
+- :meth:`ExtensionArray.argsort` places NA values at the end of the sorted array (:issue:`21801`)
 
 .. _whatsnew_0250.deprecations:
 

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -23,6 +23,7 @@
 
 from pandas._typing import ArrayLike
 from pandas.core import ops
+from pandas.core.sorting import nargsort
 
 _not_implemented_message = "{} does not implement {}."
 
@@ -361,23 +362,6 @@ def isna(self) -> ArrayLike:
         """
         raise AbstractMethodError(self)
 
-    def _values_for_argsort(self) -> np.ndarray:
-        """
-        Return values for sorting.
-
-        Returns
-        -------
-        ndarray
-            The transformed values should maintain the ordering between values
-            within the array.
-
-        See Also
-        --------
-        ExtensionArray.argsort
-        """
-        # Note: this is used in `ExtensionArray.argsort`.
-        return np.array(self)
-
     def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
         """
         Return the indices that would sort this array.
@@ -403,13 +387,20 @@ def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
         """
         # Implementor note: You have two places to override the behavior of
         # argsort.
-        # 1. _values_for_argsort : construct the values passed to np.argsort
+        # 1. _values_for_argsort : construct the values passed to np.argsort.
+        # The transformed values should maintain the ordering between values
+        # within the array. Preferably, the largest value is assiged to NA
+        # values in transformation.
         # 2. argsort : total control over sorting.
         ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
-        values = self._values_for_argsort()
-        result = np.argsort(values, kind=kind, **kwargs)
-        if not ascending:
-            result = result[::-1]
+
+        if hasattr(self, '_values_for_argsort'):
+            values = self._values_for_argsort()
+        else:
+            values = self
+        na_position = 'last' if ascending else 'first'
+        result = nargsort(values, kind=kind, ascending=ascending,
+                          na_position=na_position)
         return result
 
     def fillna(self, value=None, method=None, limit=None):

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1523,7 +1523,7 @@ def check_for_ordered(self, op):
     def _values_for_argsort(self):
         return self._codes.copy()
 
-    def argsort(self, *args, **kwargs):
+    def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
         # TODO(PY2): use correct signature
         # We have to do *args, **kwargs to avoid a a py2-only signature
         # issue since np.argsort differs from argsort.
@@ -1567,8 +1567,12 @@ def argsort(self, *args, **kwargs):
         >>> cat.argsort()
         array([3, 0, 1, 2])
         """
-        # Keep the implementation here just for the docstring.
-        return super().argsort(*args, **kwargs)
+        ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
+        values = self._values_for_argsort()
+        result = np.argsort(values, kind=kind, **kwargs)
+        if not ascending:
+            result = result[::-1]
+        return result
 
     def sort_values(self, inplace=False, ascending=True, na_position='last'):
         """

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -620,9 +620,6 @@ def _values_for_factorize(self):
     def _from_factorized(cls, values, original):
         return cls(values, dtype=original.dtype)
 
-    def _values_for_argsort(self):
-        return self._data
-
     # ------------------------------------------------------------------
     # Additional array methods
     #  These are not part of the EA API, but we implement them because

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -512,23 +512,6 @@ def value_counts(self, dropna=True):
 
         return Series(array, index=index)
 
-    def _values_for_argsort(self) -> np.ndarray:
-        """Return values for sorting.
-
-        Returns
-        -------
-        ndarray
-            The transformed values should maintain the ordering between values
-            within the array.
-
-        See Also
-        --------
-        ExtensionArray.argsort
-        """
-        data = self._data.copy()
-        data[self._mask] = data.min() - 1
-        return data
-
     @classmethod
     def _create_comparison_method(cls, op):
         def cmp_method(self, other):

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -280,9 +280,6 @@ def take(self, indices, allow_fill=False, fill_value=None):
     def copy(self, deep=False):
         return type(self)(self._ndarray.copy())
 
-    def _values_for_argsort(self):
-        return self._ndarray
-
     def _values_for_factorize(self):
         return self._ndarray, -1
 

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -673,9 +673,6 @@ def _check_timedeltalike_freq_compat(self, other):
 
         _raise_on_incompatible(self, other)
 
-    def _values_for_argsort(self):
-        return self._data
-
 
 PeriodArray._add_comparison_ops()
 

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -44,6 +44,11 @@ def test_argsort_missing(self, data_missing_for_sorting):
         expected = pd.Series(np.array([1, -1, 0], dtype=np.int64))
         self.assert_series_equal(result, expected)
 
+    def test_argsort_nan_last(self, data_missing_for_sorting):
+        # GH 21801
+        result = data_missing_for_sorting.argsort()
+        assert result[-1] == 1
+
     @pytest.mark.parametrize('ascending', [True, False])
     def test_sort_values(self, data_for_sorting, ascending):
         ser = pd.Series(data_for_sorting)

diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
@@ -153,6 +153,12 @@ def _reduce(self, name, skipna=True, **kwargs):
                                       "the {} operation".format(name))
         return op(axis=0)
 
+    def _values_for_argsort(self):
+        data = self._data
+        mask = self.isna()
+        data[mask] = decimal.Decimal('Infinity')
+        return data
+
 
 def to_decimal(values, context=None):
     return DecimalArray([decimal.Decimal(x) for x in values], context=context)

diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
@@ -182,7 +182,10 @@ def _values_for_argsort(self):
         # If all the elemnts of self are the same size P, NumPy will
         # cast them to an (N, P) array, instead of an (N,) array of tuples.
         frozen = [()] + [tuple(x.items()) for x in self]
-        return np.array(frozen, dtype=object)[1:]
+        data = np.array(frozen, dtype=object)[1:]
+        mask = self.isna()
+        data[mask] = np.nan
+        return data
 
 
 def make_data():

diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
@@ -193,6 +193,11 @@ def test_searchsorted(self, data_for_sorting):
         if not data_for_sorting.ordered:
             raise pytest.skip(reason="searchsorted requires ordered data.")
 
+    def test_argsort_nan_last(self, data_missing_for_sorting):
+        # GH 21801
+        # TODO: Categorical.argsort places NA values at the end
+        pass
+
 
 class TestCasting(base.BaseCastingTests):
     pass