Skip to content

Commit 47f9751

Browse files
BUG: fixed formatting and small linting issues
1 parent 619ca63 commit 47f9751

File tree

10 files changed

+100
-73
lines changed

10 files changed

+100
-73
lines changed

doc/source/whatsnew/v1.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ Other enhancements
125125
- Roundtripping DataFrames with nullable integer or string data types to parquet
126126
(:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
127127
now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
128+
- :meth:`DataFrame.sort_values`, :meth:`DataFrame.sort_index`, :meth:`Series.sort_index`, and :meth:`Series.sort_index`
129+
now support the ``key`` argument which allows for custom sorting orders (:issue:`3942`)
128130

129131
Build Changes
130132
^^^^^^^^^^^^^

pandas/core/arrays/categorical.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1603,7 +1603,7 @@ def argsort(self, ascending=True, kind="quicksort", *args, **kwargs):
16031603
"""
16041604
return super().argsort(ascending=ascending, kind=kind, *args, **kwargs)
16051605

1606-
def sort_values(self, inplace=False, ascending=True, na_position="last"):
1606+
def sort_values(self, inplace=False, ascending=True, na_position="last", key=None):
16071607
"""
16081608
Sort the Categorical by category value returning a new
16091609
Categorical by default.
@@ -1682,7 +1682,9 @@ def sort_values(self, inplace=False, ascending=True, na_position="last"):
16821682
msg = "invalid na_position: {na_position!r}"
16831683
raise ValueError(msg.format(na_position=na_position))
16841684

1685-
sorted_idx = nargsort(self, ascending=ascending, na_position=na_position)
1685+
sorted_idx = nargsort(
1686+
self, ascending=ascending, na_position=na_position, key=key
1687+
)
16861688

16871689
if inplace:
16881690
self._codes = self._codes[sorted_idx]

pandas/core/frame.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import sys
1616
from textwrap import dedent
1717
from typing import (
18+
Callable,
1819
FrozenSet,
1920
Hashable,
2021
Iterable,
@@ -25,7 +26,6 @@
2526
Tuple,
2627
Type,
2728
Union,
28-
Callable
2929
)
3030
import warnings
3131

@@ -4715,15 +4715,15 @@ def f(vals):
47154715

47164716
@Substitution(**_shared_doc_kwargs)
47174717
@Appender(NDFrame.sort_values.__doc__)
4718-
def sort_values(
4718+
def sort_values( # type: ignore
47194719
self,
47204720
by,
47214721
axis=0,
47224722
ascending=True,
47234723
inplace=False,
47244724
kind="quicksort",
47254725
na_position="last",
4726-
key : Union[Callable, None] = None
4726+
key: Optional[Callable] = None,
47274727
):
47284728
inplace = validate_bool_kwarg(inplace, "inplace")
47294729
axis = self._get_axis_number(axis)
@@ -4738,7 +4738,9 @@ def sort_values(
47384738
from pandas.core.sorting import lexsort_indexer
47394739

47404740
keys = [self._get_label_or_level_values(x, axis=axis) for x in by]
4741-
indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position, key=key)
4741+
indexer = lexsort_indexer(
4742+
keys, orders=ascending, na_position=na_position, key=key
4743+
)
47424744
indexer = ensure_platform_int(indexer)
47434745
else:
47444746
from pandas.core.sorting import nargsort
@@ -4764,7 +4766,7 @@ def sort_values(
47644766

47654767
@Substitution(**_shared_doc_kwargs)
47664768
@Appender(NDFrame.sort_index.__doc__)
4767-
def sort_index(
4769+
def sort_index( # type: ignore
47684770
self,
47694771
axis=0,
47704772
level=None,
@@ -4774,7 +4776,7 @@ def sort_index(
47744776
na_position="last",
47754777
sort_remaining=True,
47764778
by=None,
4777-
key : Union[Callable, None] = None
4779+
key: Optional[Callable] = None,
47784780
):
47794781

47804782
# TODO: this can be combined with Series.sort_index impl as

pandas/core/indexes/base.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
from datetime import datetime
22
import operator
33
from textwrap import dedent
4-
5-
from typing import FrozenSet, Union, Callable
4+
from typing import Callable, FrozenSet, Optional, Union
65
import warnings
76

87
import numpy as np
@@ -4425,7 +4424,9 @@ def asof_locs(self, where, mask):
44254424

44264425
return result
44274426

4428-
def sort_values(self, return_indexer=False, ascending=True, key : Callable = None):
4427+
def sort_values(
4428+
self, return_indexer=False, ascending=True, key: Optional[Callable] = None
4429+
):
44294430
"""
44304431
Return a sorted copy of the index.
44314432
@@ -4439,7 +4440,7 @@ def sort_values(self, return_indexer=False, ascending=True, key : Callable = Non
44394440
ascending : bool, default True
44404441
Should the index values be sorted in an ascending order.
44414442
key : Callable, default None
4442-
Apply a key function to the indices before sorting, like
4443+
Apply a key function to the indices before sorting, like
44434444
built-in sorted function.
44444445
44454446
Returns

pandas/core/indexes/datetimelike.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Base and utility classes for tseries type pandas objects.
33
"""
44
import operator
5-
from typing import Set
5+
from typing import Callable, Optional, Set
66
import warnings
77

88
import numpy as np
@@ -279,19 +279,26 @@ def map(self, mapper, na_action=None):
279279
except Exception:
280280
return self.astype(object).map(mapper)
281281

282-
def sort_values(self, return_indexer=False, ascending=True):
282+
def sort_values(
283+
self, return_indexer=False, ascending=True, key: Optional[Callable] = None
284+
):
283285
"""
284286
Return sorted copy of Index.
285287
"""
288+
if key:
289+
idx = self.map(key, na_action="ignore")
290+
else:
291+
idx = self
292+
286293
if return_indexer:
287-
_as = self.argsort()
294+
_as = idx.argsort()
288295
if not ascending:
289296
_as = _as[::-1]
290297
sorted_index = self.take(_as)
291298
return sorted_index, _as
292299
else:
293-
sorted_values = np.sort(self._ndarray_values)
294-
attribs = self._get_attributes_dict()
300+
sorted_values = np.sort(idx._ndarray_values)
301+
attribs = self._get_attributes_dict() # type: ignore
295302
freq = attribs["freq"]
296303

297304
if freq is not None and not is_period_dtype(self):
@@ -304,7 +311,7 @@ def sort_values(self, return_indexer=False, ascending=True):
304311
if not ascending:
305312
sorted_values = sorted_values[::-1]
306313

307-
return self._simple_new(sorted_values, **attribs)
314+
return self._simple_new(sorted_values, **attribs) # type: ignore
308315

309316
@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
310317
def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):

pandas/core/series.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -2883,14 +2883,14 @@ def update(self, other):
28832883
# ----------------------------------------------------------------------
28842884
# Reindexing, sorting
28852885

2886-
def sort_values(
2886+
def sort_values( # type: ignore
28872887
self,
28882888
axis=0,
28892889
ascending=True,
28902890
inplace=False,
28912891
kind="quicksort",
28922892
na_position="last",
2893-
key: Callable = None
2893+
key: Optional[Callable] = None,
28942894
):
28952895
"""
28962896
Sort by the values.
@@ -3081,7 +3081,7 @@ def _try_kind_sort(arr):
30813081
else:
30823082
return result.__finalize__(self)
30833083

3084-
def sort_index(
3084+
def sort_index( # type: ignore
30853085
self,
30863086
axis=0,
30873087
level=None,
@@ -3090,7 +3090,7 @@ def sort_index(
30903090
kind="quicksort",
30913091
na_position="last",
30923092
sort_remaining=True,
3093-
key : Callable = None
3093+
key: Optional[Callable] = None,
30943094
):
30953095
"""
30963096
Sort Series by index labels.

pandas/core/sorting.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
""" miscellaneous sorting / groupby utilities """
2-
from typing import Callable, Union
2+
from typing import Callable, Optional
33

44
import numpy as np
55

@@ -189,7 +189,8 @@ def indexer_from_factorized(labels, shape, compress: bool = True):
189189
return get_group_index_sorter(ids, ngroups)
190190

191191

192-
def lexsort_indexer(keys, orders=None, na_position="last", key : Union[Callable, None] = None):
192+
def lexsort_indexer(keys, orders=None, na_position="last", key=None):
193+
193194
from pandas.core.arrays import Categorical
194195

195196
labels = []
@@ -239,7 +240,13 @@ def lexsort_indexer(keys, orders=None, na_position="last", key : Union[Callable,
239240
return indexer_from_factorized(labels, shape)
240241

241242

242-
def nargsort(items, kind="quicksort", ascending=True, na_position="last", key: Union[Callable, None] = None):
243+
def nargsort(
244+
items,
245+
kind="quicksort",
246+
ascending: bool = True,
247+
na_position="last",
248+
key: Optional[Callable] = None,
249+
):
243250
"""
244251
This is intended to be a drop-in replacement for np.argsort which
245252
handles NaNs. It adds ascending and na_position parameters.
@@ -260,9 +267,11 @@ def nargsort(items, kind="quicksort", ascending=True, na_position="last", key: U
260267
if masked.size == 0:
261268
vals = np.array([]) # vectorize fails on empty object arrays
262269
else:
263-
vals = np.asarray(key_func(masked)) # revert from masked
270+
vals = np.asarray(key_func(masked)) # revert from masked
264271

265-
return nargsort(vals, kind=kind, ascending=ascending, na_position=na_position, key=None)
272+
return nargsort(
273+
vals, kind=kind, ascending=ascending, na_position=na_position, key=None
274+
)
266275

267276
idx = np.arange(len(items))
268277
non_nans = items[~mask]

pandas/tests/frame/test_sorting.py

+31-26
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_sort_values(self):
8181
with pytest.raises(ValueError, match=msg):
8282
frame.sort_values(by=["A", "B"], axis=0, ascending=[True] * 5)
8383

84-
@pytest.fixture(params=[None, lambda x : x])
84+
@pytest.fixture(params=[None, lambda x: x])
8585
def key(self, request):
8686
return request.param
8787

@@ -93,22 +93,22 @@ def test_sort_values_inplace(self, key):
9393
sorted_df = frame.copy()
9494
sorted_df.sort_values(by="A", inplace=True, key=key)
9595
expected = frame.sort_values(by="A", key=key)
96-
assert_frame_equal(sorted_df, expected)
96+
tm.assert_frame_equal(sorted_df, expected)
9797

9898
sorted_df = frame.copy()
9999
sorted_df.sort_values(by=1, axis=1, inplace=True, key=key)
100100
expected = frame.sort_values(by=1, axis=1, key=key)
101-
assert_frame_equal(sorted_df, expected)
101+
tm.assert_frame_equal(sorted_df, expected)
102102

103103
sorted_df = frame.copy()
104104
sorted_df.sort_values(by="A", ascending=False, inplace=True, key=key)
105105
expected = frame.sort_values(by="A", ascending=False, key=key)
106-
assert_frame_equal(sorted_df, expected)
106+
tm.assert_frame_equal(sorted_df, expected)
107107

108108
sorted_df = frame.copy()
109109
sorted_df.sort_values(by=["A", "B"], ascending=False, inplace=True, key=key)
110110
expected = frame.sort_values(by=["A", "B"], ascending=False, key=key)
111-
assert_frame_equal(sorted_df, expected)
111+
tm.assert_frame_equal(sorted_df, expected)
112112

113113
def test_sort_nan(self):
114114
# GH3917
@@ -256,15 +256,20 @@ def test_sort_multi_index_key(self):
256256
df = DataFrame(
257257
{"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")}
258258
)
259-
result = df.set_index(list("abc")).sort_index(level=list("ba"), key=lambda x : x[0])
259+
result = df.set_index(list("abc")).sort_index(
260+
level=list("ba"), key=lambda x: x[0]
261+
)
260262

261263
expected = DataFrame(
262264
{"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")}
263265
)
264266
expected = expected.set_index(list("abc"))
265267
tm.assert_frame_equal(result, expected)
266268

267-
result = df.set_index(list("abc")).sort_index(level=list("ba"), key=lambda x : x[2])
269+
result = df.set_index(list("abc")).sort_index(
270+
level=list("ba"), key=lambda x: x[2]
271+
)
272+
268273
expected = df.set_index(list("abc"))
269274
tm.assert_frame_equal(result, expected)
270275

@@ -594,63 +599,63 @@ def test_sort_index_key(self):
594599

595600
result = df.sort_index()
596601
expected = df.iloc[[2, 3, 0, 1, 5, 4]]
597-
assert_frame_equal(result, expected)
602+
tm.assert_frame_equal(result, expected)
598603

599604
result = df.sort_index(key=str.lower)
600605
expected = df.iloc[[0, 1, 5, 2, 3, 4]]
601-
assert_frame_equal(result, expected)
606+
tm.assert_frame_equal(result, expected)
602607

603608
result = df.sort_index(key=str.lower, ascending=False)
604609
expected = df.iloc[[4, 2, 3, 0, 1, 5]]
605-
assert_frame_equal(result, expected)
610+
tm.assert_frame_equal(result, expected)
606611

607-
@pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64', 'float32', 'float64'])
612+
@pytest.mark.parametrize("dtype", ["int8", "int64", "float64"])
608613
def test_sort_index_key_int(self, dtype):
609614
df = DataFrame(np.arange(6, dtype=dtype), index=np.arange(6, dtype=dtype))
610615

611616
result = df.sort_index()
612-
assert_frame_equal(result, df)
617+
tm.assert_frame_equal(result, df)
613618

614-
result = df.sort_index(key=lambda x : -x)
619+
result = df.sort_index(key=lambda x: -x)
615620
expected = df.sort_index(ascending=False)
616-
assert_frame_equal(result, expected)
621+
tm.assert_frame_equal(result, expected)
617622

618-
result = df.sort_index(key=lambda x : 2 * x)
619-
assert_frame_equal(result, df)
623+
result = df.sort_index(key=lambda x: 2 * x)
624+
tm.assert_frame_equal(result, df)
620625

621626
def test_sort_value_key(self):
622627
df = DataFrame(np.array([0, 5, np.nan, 3, 2, np.nan]))
623628

624629
result = df.sort_values(0)
625630
expected = df.iloc[[0, 4, 3, 1, 2, 5]]
626-
assert_frame_equal(result, expected)
631+
tm.assert_frame_equal(result, expected)
627632

628-
result = df.sort_values(0, key=lambda x : x + 5)
633+
result = df.sort_values(0, key=lambda x: x + 5)
629634
expected = df.iloc[[0, 4, 3, 1, 2, 5]]
630-
assert_frame_equal(result, expected)
635+
tm.assert_frame_equal(result, expected)
631636

632-
result = df.sort_values(0, key=lambda x : -x, ascending=False)
637+
result = df.sort_values(0, key=lambda x: -x, ascending=False)
633638
expected = df.iloc[[0, 4, 3, 1, 2, 5]]
634-
assert_frame_equal(result, expected)
639+
tm.assert_frame_equal(result, expected)
635640

636641
def test_sort_value_key_nan(self):
637642
df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
638643

639644
result = df.sort_values(1)
640645
expected = df[::-1]
641-
assert_frame_equal(result, expected)
646+
tm.assert_frame_equal(result, expected)
642647

643648
result = df.sort_values([0, 1], key=str.lower)
644-
assert_frame_equal(result, df)
649+
tm.assert_frame_equal(result, df)
645650

646651
result = df.sort_values([0, 1], key=str.lower, ascending=False)
647652
expected = df.sort_values(1, key=str.lower, ascending=False)
648-
assert_frame_equal(result, expected)
653+
tm.assert_frame_equal(result, expected)
649654

650-
@pytest.mark.parametrize('key', [None, lambda x : x])
655+
@pytest.mark.parametrize("key", [None, lambda x: x])
651656
def test_sort_value_key_empty(self, key):
652657
df = DataFrame(np.array([]))
653-
658+
654659
df.sort_values(0, key=key)
655660
df.sort_index(key=key)
656661

0 commit comments

Comments
 (0)