47
47
Axis ,
48
48
Dtype ,
49
49
FilePathOrBuffer ,
50
+ IndexKeyFunc ,
50
51
Label ,
51
52
Level ,
52
53
Renamer ,
54
+ ValueKeyFunc ,
53
55
)
54
56
from pandas .compat import PY37
55
57
from pandas .compat ._optional import import_optional_dependency
139
141
)
140
142
from pandas .core .ops .missing import dispatch_fill_zeros
141
143
from pandas .core .series import Series
144
+ from pandas .core .sorting import ensure_key_mapped
142
145
143
146
from pandas .io .common import get_filepath_or_buffer
144
147
from pandas .io .formats import console , format as fmt
@@ -5054,10 +5057,10 @@ def f(vals):
5054
5057
5055
5058
# ----------------------------------------------------------------------
5056
5059
# Sorting
5057
-
5060
+ # TODO: Just move the sort_values doc here.
5058
5061
@Substitution (** _shared_doc_kwargs )
5059
5062
@Appender (NDFrame .sort_values .__doc__ )
5060
- def sort_values (
5063
+ def sort_values ( # type: ignore[override] # NOQA # issue 27237
5061
5064
self ,
5062
5065
by ,
5063
5066
axis = 0 ,
@@ -5066,6 +5069,7 @@ def sort_values(
5066
5069
kind = "quicksort" ,
5067
5070
na_position = "last" ,
5068
5071
ignore_index = False ,
5072
+ key : ValueKeyFunc = None ,
5069
5073
):
5070
5074
inplace = validate_bool_kwarg (inplace , "inplace" )
5071
5075
axis = self ._get_axis_number (axis )
@@ -5080,19 +5084,30 @@ def sort_values(
5080
5084
from pandas .core .sorting import lexsort_indexer
5081
5085
5082
5086
keys = [self ._get_label_or_level_values (x , axis = axis ) for x in by ]
5083
- indexer = lexsort_indexer (keys , orders = ascending , na_position = na_position )
5087
+
5088
+ # need to rewrap columns in Series to apply key function
5089
+ if key is not None :
5090
+ keys = [Series (k , name = name ) for (k , name ) in zip (keys , by )]
5091
+
5092
+ indexer = lexsort_indexer (
5093
+ keys , orders = ascending , na_position = na_position , key = key
5094
+ )
5084
5095
indexer = ensure_platform_int (indexer )
5085
5096
else :
5086
5097
from pandas .core .sorting import nargsort
5087
5098
5088
5099
by = by [0 ]
5089
5100
k = self ._get_label_or_level_values (by , axis = axis )
5090
5101
5102
+ # need to rewrap column in Series to apply key function
5103
+ if key is not None :
5104
+ k = Series (k , name = by )
5105
+
5091
5106
if isinstance (ascending , (tuple , list )):
5092
5107
ascending = ascending [0 ]
5093
5108
5094
5109
indexer = nargsort (
5095
- k , kind = kind , ascending = ascending , na_position = na_position
5110
+ k , kind = kind , ascending = ascending , na_position = na_position , key = key
5096
5111
)
5097
5112
5098
5113
new_data = self ._mgr .take (
@@ -5118,6 +5133,7 @@ def sort_index(
5118
5133
na_position : str = "last" ,
5119
5134
sort_remaining : bool = True ,
5120
5135
ignore_index : bool = False ,
5136
+ key : IndexKeyFunc = None ,
5121
5137
):
5122
5138
"""
5123
5139
Sort object by labels (along an axis).
@@ -5153,6 +5169,16 @@ def sort_index(
5153
5169
5154
5170
.. versionadded:: 1.0.0
5155
5171
5172
+ key : callable, optional
5173
+ If not None, apply the key function to the index values
5174
+ before sorting. This is similar to the `key` argument in the
5175
+ builtin :meth:`sorted` function, with the notable difference that
5176
+ this `key` function should be *vectorized*. It should expect an
5177
+ ``Index`` and return an ``Index`` of the same shape. For MultiIndex
5178
+ inputs, the key is applied *per level*.
5179
+
5180
+ .. versionadded:: 1.1.0
5181
+
5156
5182
Returns
5157
5183
-------
5158
5184
DataFrame
@@ -5186,6 +5212,17 @@ def sort_index(
5186
5212
100 1
5187
5213
29 2
5188
5214
1 4
5215
+
5216
+ A key function can be specified which is applied to the index before
5217
+ sorting. For a ``MultiIndex`` this is applied to each level separately.
5218
+
5219
+ >>> df = pd.DataFrame({"a": [1, 2, 3, 4]}, index=['A', 'b', 'C', 'd'])
5220
+ >>> df.sort_index(key=lambda x: x.str.lower())
5221
+ a
5222
+ A 1
5223
+ b 2
5224
+ C 3
5225
+ d 4
5189
5226
"""
5190
5227
# TODO: this can be combined with Series.sort_index impl as
5191
5228
# almost identical
@@ -5194,12 +5231,12 @@ def sort_index(
5194
5231
5195
5232
axis = self ._get_axis_number (axis )
5196
5233
labels = self ._get_axis (axis )
5234
+ labels = ensure_key_mapped (labels , key , levels = level )
5197
5235
5198
5236
# make sure that the axis is lexsorted to start
5199
5237
# if not we need to reconstruct to get the correct indexer
5200
5238
labels = labels ._sort_levels_monotonic ()
5201
5239
if level is not None :
5202
-
5203
5240
new_axis , indexer = labels .sortlevel (
5204
5241
level , ascending = ascending , sort_remaining = sort_remaining
5205
5242
)
0 commit comments