DOC: updated inline documentation for key sorting

jacobaustin123 · jacobaustin123 · commit 9ce3b26a0ccc · 2019-11-29T20:52:58.000-05:00
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -150,6 +150,8 @@ Other enhancements
 - Roundtripping DataFrames with nullable integer or string data types to parquet
   (:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
   now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
+- :meth:`DataFrame.sort_values`, :meth:`DataFrame.sort_index`, :meth:`Series.sort_index`, and :meth:`Series.sort_index`
+   now support the ``key`` argument which allows for custom sorting orders (:issue:`3942`)
 
 Build Changes
 ^^^^^^^^^^^^^
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1579,7 +1579,7 @@ def argsort(self, ascending=True, kind="quicksort", *args, **kwargs):
         """
         return super().argsort(ascending=ascending, kind=kind, *args, **kwargs)
 
-    def sort_values(self, inplace=False, ascending=True, na_position="last"):
+    def sort_values(self, inplace=False, ascending=True, na_position="last", key=None):
         """
         Sort the Categorical by category value returning a new
         Categorical by default.
@@ -1601,6 +1601,9 @@ def sort_values(self, inplace=False, ascending=True, na_position="last"):
         na_position : {'first', 'last'} (optional, default='last')
             'first' puts NaNs at the beginning
             'last' puts NaNs at the end
+        key : Callable, default None
+            If not None, apply the key function to every value before
+            sorting. Identical to key argument in built-in sorted function.
 
         Returns
         -------
@@ -1657,7 +1660,9 @@ def sort_values(self, inplace=False, ascending=True, na_position="last"):
         if na_position not in ["last", "first"]:
             raise ValueError(f"invalid na_position: {na_position!r}")
 
-        sorted_idx = nargsort(self, ascending=ascending, na_position=na_position)
+        sorted_idx = nargsort(
+            self, ascending=ascending, na_position=na_position, key=key
+        )
 
         if inplace:
             self._codes = self._codes[sorted_idx]
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4704,7 +4704,6 @@ def f(vals):
 
     # ----------------------------------------------------------------------
     # Sorting
-
     @Substitution(**_shared_doc_kwargs)
     @Appender(NDFrame.sort_values.__doc__)
     def sort_values(
@@ -4715,7 +4714,7 @@ def sort_values(
         inplace=False,
         kind="quicksort",
         na_position="last",
-        key = None
+        key=None,
     ):
         inplace = validate_bool_kwarg(inplace, "inplace")
         axis = self._get_axis_number(axis)
@@ -4729,29 +4728,22 @@ def sort_values(
         if len(by) > 1:
             from pandas.core.sorting import lexsort_indexer
 
-            if key is not None:
-                key_func = np.vectorize(key)
-                keys = [key_func(self._get_label_or_level_values(x, axis=axis)) for x in by]
-            else:
-                keys = [self._get_label_or_level_values(x, axis=axis) for x in by]
-
-            indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position)
+            keys = [self._get_label_or_level_values(x, axis=axis) for x in by]
+            indexer = lexsort_indexer(
+                keys, orders=ascending, na_position=na_position, key=key
+            )
             indexer = ensure_platform_int(indexer)
         else:
             from pandas.core.sorting import nargsort
 
             by = by[0]
             k = self._get_label_or_level_values(by, axis=axis)
 
-            if key is not None:
-                key_func = np.vectorize(key)
-                k = key_func(k)
-
             if isinstance(ascending, (tuple, list)):
                 ascending = ascending[0]
 
             indexer = nargsort(
-                k, kind=kind, ascending=ascending, na_position=na_position
+                k, kind=kind, ascending=ascending, na_position=na_position, key=key
             )
 
         new_data = self._data.take(
@@ -4774,7 +4766,7 @@ def sort_index(
         kind="quicksort",
         na_position="last",
         sort_remaining=True,
-        key=None
+        key=None,
     ):
 
         # TODO: this can be combined with Series.sort_index impl as
@@ -4785,8 +4777,8 @@ def sort_index(
         axis = self._get_axis_number(axis)
         labels = self._get_axis(axis)
         if key is not None:
-            labels = labels.map(key)
-            
+            labels = labels.map(key, na_action="ignore")
+
         # make sure that the axis is lexsorted to start
         # if not we need to reconstruct to get the correct indexer
         labels = labels._sort_levels_monotonic()
@@ -4798,9 +4790,8 @@ def sort_index(
         elif isinstance(labels, ABCMultiIndex):
             from pandas.core.sorting import lexsort_indexer
 
-            codes = labels._get_codes_for_sorting()
             indexer = lexsort_indexer(
-                codes,
+                labels._get_codes_for_sorting(),
                 orders=ascending,
                 na_position=na_position,
             )
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1,7 +1,7 @@
 from datetime import datetime
 import operator
 from textwrap import dedent
-from typing import FrozenSet, Union
+from typing import Callable, FrozenSet, Optional, Union
 import warnings
 
 import numpy as np
@@ -4400,7 +4400,9 @@ def asof_locs(self, where, mask):
 
         return result
 
-    def sort_values(self, return_indexer=False, ascending=True):
+    def sort_values(
+        self, return_indexer=False, ascending=True, key: Optional[Callable] = None
+    ):
         """
         Return a sorted copy of the index.
 
@@ -4413,6 +4415,9 @@ def sort_values(self, return_indexer=False, ascending=True):
             Should the indices that would sort the index be returned.
         ascending : bool, default True
             Should the index values be sorted in an ascending order.
+        key : Callable, default None
+            Apply a key function to the indices before sorting, like
+            built-in sorted function.
 
         Returns
         -------
@@ -4443,7 +4448,12 @@ def sort_values(self, return_indexer=False, ascending=True):
         >>> idx.sort_values(ascending=False, return_indexer=True)
         (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
         """
-        _as = self.argsort()
+        if key:
+            idx = self.map(key, na_action="ignore")
+        else:
+            idx = self
+
+        _as = idx.argsort()
         if not ascending:
             _as = _as[::-1]
 
@@ -4553,9 +4563,12 @@ def argsort(self, *args, **kwargs):
         >>> idx[order]
         Index(['a', 'b', 'c', 'd'], dtype='object')
         """
+
         result = self.asi8
+
         if result is None:
             result = np.array(self)
+
         return result.argsort(*args, **kwargs)
 
     _index_shared_docs[
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -2,7 +2,7 @@
 Base and utility classes for tseries type pandas objects.
 """
 import operator
-from typing import Set
+from typing import Callable, Optional, Set
 
 import numpy as np
 
@@ -273,12 +273,22 @@ def map(self, mapper, na_action=None):
         except Exception:
             return self.astype(object).map(mapper)
 
-    def sort_values(self, return_indexer=False, ascending=True):
+    def sort_values(
+        self, return_indexer=False, ascending=True, key: Optional[Callable] = None
+    ):
         """
         Return sorted copy of Index.
         """
+        if not isinstance(self, Index):
+            raise TypeError("sort_values must be called on an Index object")
+
+        if key:
+            idx = self.map(key, na_action="ignore")
+        else:
+            idx = self
+
         if return_indexer:
-            _as = self.argsort()
+            _as = idx.argsort()
             if not ascending:
                 _as = _as[::-1]
             sorted_index = self.take(_as)
@@ -287,7 +297,7 @@ def sort_values(self, return_indexer=False, ascending=True):
             # NB: using asi8 instead of _ndarray_values matters in numpy 1.18
             #  because the treatment of NaT has been changed to put NaT last
             #  instead of first.
-            sorted_values = np.sort(self.asi8)
+            sorted_values = np.sort(idx.asi8)
             attribs = self._get_attributes_dict()
             freq = attribs["freq"]
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2835,7 +2835,7 @@ def sort_values(
         inplace=False,
         kind="quicksort",
         na_position="last",
-        key=None
+        key=None,
     ):
         """
         Sort by the values.
@@ -2858,6 +2858,9 @@ def sort_values(
         na_position : {'first' or 'last'}, default 'last'
             Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
             the end.
+        key : Callable, default None
+            If not None, apply the key function to every value before
+            sorting. Identical to key argument in built-in sorted function.
 
         Returns
         -------
@@ -2940,6 +2943,22 @@ def sort_values(
         2    d
         0    z
         dtype: object
+
+        >>> s = pd.Series(['a', 'B', 'c', 'D', 'e'])
+        >>> s.sort_values()
+        1    B
+        3    D
+        0    a
+        2    c
+        4    e
+        dtype: object
+        >>> s.sort_values(key=str.lower)
+        0    a
+        1    B
+        2    c
+        3    D
+        4    e
+        dtype: object
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
         # Validate the axis parameter
@@ -3016,7 +3035,7 @@ def sort_index(
         kind="quicksort",
         na_position="last",
         sort_remaining=True,
-        key=None
+        key: Optional[Callable] = None,
     ):
         """
         Sort Series by index labels.
@@ -3045,6 +3064,9 @@ def sort_index(
         sort_remaining : bool, default True
             If True and sorting by level and index is multilevel, sort by other
             levels too (in order) after sorting by specified level.
+        key : Callable, default None
+            If not None, apply the key function to every index element before
+            sorting. Identical to key argument in built-in sorted function.
 
         Returns
         -------
@@ -3127,7 +3149,20 @@ def sort_index(
         baz  two    5
         bar  two    7
         dtype: int64
+
+        >>> s = Series([1, 2, 3, 4, 5, 6, 7, 8])
+        >>> s.sort_index(key=lambda x : -x)
+        7    8
+        6    7
+        5    6
+        4    5
+        3    4
+        2    3
+        1    2
+        0    1
+        dtype: int64
         """
+
         # TODO: this can be combined with DataFrame.sort_index impl as
         # almost identical
         inplace = validate_bool_kwarg(inplace, "inplace")
@@ -3136,8 +3171,8 @@ def sort_index(
         index = self.index
         true_index = index
         if key is not None:
-            index = index.map(key)
-        
+            index = index.map(key, na_action="ignore")
+
         if level is not None:
             new_index, indexer = index.sortlevel(
                 level, ascending=ascending, sort_remaining=sort_remaining
@@ -3147,10 +3182,9 @@ def sort_index(
             from pandas.core.sorting import lexsort_indexer
 
             labels = index._sort_levels_monotonic()
-            codes = labels._get_codes_for_sorting()
 
             indexer = lexsort_indexer(
-                codes,
+                labels._get_codes_for_sorting(),
                 orders=ascending,
                 na_position=na_position,
             )
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
@@ -1,4 +1,6 @@
 """ miscellaneous sorting / groupby utilities """
+from typing import Callable, Optional
+
 import numpy as np
 
 from pandas._libs import algos, hashtable, lib
@@ -187,7 +189,8 @@ def indexer_from_factorized(labels, shape, compress: bool = True):
     return get_group_index_sorter(ids, ngroups)
 
 
-def lexsort_indexer(keys, orders=None, na_position="last"):
+def lexsort_indexer(keys, orders=None, na_position="last", key=None):
+
     from pandas.core.arrays import Categorical
 
     labels = []
@@ -197,6 +200,10 @@ def lexsort_indexer(keys, orders=None, na_position="last"):
     elif orders is None:
         orders = [True] * len(keys)
 
+    if key:
+        key_func = np.vectorize(key)
+        keys = [key_func(entry) if entry.size != 0 else entry for entry in keys]
+
     for key, order in zip(keys, orders):
 
         # we are already a Categorical
@@ -233,7 +240,13 @@ def lexsort_indexer(keys, orders=None, na_position="last"):
     return indexer_from_factorized(labels, shape)
 
 
-def nargsort(items, kind="quicksort", ascending: bool = True, na_position="last"):
+def nargsort(
+    items,
+    kind="quicksort",
+    ascending: bool = True,
+    na_position="last",
+    key: Optional[Callable] = None,
+):
     """
     This is intended to be a drop-in replacement for np.argsort which
     handles NaNs. It adds ascending and na_position parameters.
@@ -247,6 +260,19 @@ def nargsort(items, kind="quicksort", ascending: bool = True, na_position="last"
     else:
         items = np.asanyarray(items)
 
+    if key is not None:
+        key_func = np.vectorize(key)
+        masked = np.ma.MaskedArray(items, mask)
+
+        if masked.size == 0:
+            vals = np.array([])  # vectorize fails on empty object arrays
+        else:
+            vals = np.asarray(key_func(masked))  # revert from masked
+
+        return nargsort(
+            vals, kind=kind, ascending=ascending, na_position=na_position, key=None
+        )
+
     idx = np.arange(len(items))
     non_nans = items[~mask]
     non_nan_idx = idx[~mask]
diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py
diff --git a/pandas/tests/indexing/multiindex/test_sorted.py b/pandas/tests/indexing/multiindex/test_sorted.py
diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py