Skip to content

Commit d396eff

Browse files
BUG: fixed formatting and small linting issues
1 parent 2be9e73 commit d396eff

File tree

10 files changed

+98
-71
lines changed

10 files changed

+98
-71
lines changed

doc/source/whatsnew/v1.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ Other enhancements
150150
- Roundtripping DataFrames with nullable integer or string data types to parquet
151151
(:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
152152
now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
153+
- :meth:`DataFrame.sort_values`, :meth:`DataFrame.sort_index`, :meth:`Series.sort_index`, and :meth:`Series.sort_index`
154+
now support the ``key`` argument which allows for custom sorting orders (:issue:`3942`)
153155

154156
Build Changes
155157
^^^^^^^^^^^^^

pandas/core/arrays/categorical.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1579,7 +1579,7 @@ def argsort(self, ascending=True, kind="quicksort", *args, **kwargs):
15791579
"""
15801580
return super().argsort(ascending=ascending, kind=kind, *args, **kwargs)
15811581

1582-
def sort_values(self, inplace=False, ascending=True, na_position="last"):
1582+
def sort_values(self, inplace=False, ascending=True, na_position="last", key=None):
15831583
"""
15841584
Sort the Categorical by category value returning a new
15851585
Categorical by default.
@@ -1657,7 +1657,9 @@ def sort_values(self, inplace=False, ascending=True, na_position="last"):
16571657
if na_position not in ["last", "first"]:
16581658
raise ValueError(f"invalid na_position: {na_position!r}")
16591659

1660-
sorted_idx = nargsort(self, ascending=ascending, na_position=na_position)
1660+
sorted_idx = nargsort(
1661+
self, ascending=ascending, na_position=na_position, key=key
1662+
)
16611663

16621664
if inplace:
16631665
self._codes = self._codes[sorted_idx]

pandas/core/frame.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import sys
1616
from textwrap import dedent
1717
from typing import (
18+
Callable,
1819
FrozenSet,
1920
Hashable,
2021
Iterable,
@@ -25,7 +26,6 @@
2526
Tuple,
2627
Type,
2728
Union,
28-
Callable
2929
)
3030
import warnings
3131

@@ -4708,15 +4708,15 @@ def f(vals):
47084708

47094709
@Substitution(**_shared_doc_kwargs)
47104710
@Appender(NDFrame.sort_values.__doc__)
4711-
def sort_values(
4711+
def sort_values( # type: ignore
47124712
self,
47134713
by,
47144714
axis=0,
47154715
ascending=True,
47164716
inplace=False,
47174717
kind="quicksort",
47184718
na_position="last",
4719-
key : Union[Callable, None] = None
4719+
key: Optional[Callable] = None,
47204720
):
47214721
inplace = validate_bool_kwarg(inplace, "inplace")
47224722
axis = self._get_axis_number(axis)
@@ -4731,7 +4731,9 @@ def sort_values(
47314731
from pandas.core.sorting import lexsort_indexer
47324732

47334733
keys = [self._get_label_or_level_values(x, axis=axis) for x in by]
4734-
indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position, key=key)
4734+
indexer = lexsort_indexer(
4735+
keys, orders=ascending, na_position=na_position, key=key
4736+
)
47354737
indexer = ensure_platform_int(indexer)
47364738
else:
47374739
from pandas.core.sorting import nargsort
@@ -4757,7 +4759,7 @@ def sort_values(
47574759

47584760
@Substitution(**_shared_doc_kwargs)
47594761
@Appender(NDFrame.sort_index.__doc__)
4760-
def sort_index(
4762+
def sort_index( # type: ignore
47614763
self,
47624764
axis=0,
47634765
level=None,

pandas/core/indexes/base.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
from datetime import datetime
22
import operator
33
from textwrap import dedent
4-
5-
from typing import FrozenSet, Union, Callable
4+
from typing import Callable, FrozenSet, Optional, Union
65
import warnings
76

87
import numpy as np
@@ -4401,7 +4400,9 @@ def asof_locs(self, where, mask):
44014400

44024401
return result
44034402

4404-
def sort_values(self, return_indexer=False, ascending=True, key : Callable = None):
4403+
def sort_values(
4404+
self, return_indexer=False, ascending=True, key: Optional[Callable] = None
4405+
):
44054406
"""
44064407
Return a sorted copy of the index.
44074408
@@ -4415,7 +4416,7 @@ def sort_values(self, return_indexer=False, ascending=True, key : Callable = Non
44154416
ascending : bool, default True
44164417
Should the index values be sorted in an ascending order.
44174418
key : Callable, default None
4418-
Apply a key function to the indices before sorting, like
4419+
Apply a key function to the indices before sorting, like
44194420
built-in sorted function.
44204421
44214422
Returns

pandas/core/indexes/datetimelike.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Base and utility classes for tseries type pandas objects.
33
"""
44
import operator
5-
from typing import Set
5+
from typing import Callable, Optional, Set
66

77
import numpy as np
88

@@ -273,12 +273,19 @@ def map(self, mapper, na_action=None):
273273
except Exception:
274274
return self.astype(object).map(mapper)
275275

276-
def sort_values(self, return_indexer=False, ascending=True):
276+
def sort_values(
277+
self, return_indexer=False, ascending=True, key: Optional[Callable] = None
278+
):
277279
"""
278280
Return sorted copy of Index.
279281
"""
282+
if key:
283+
idx = self.map(key, na_action="ignore")
284+
else:
285+
idx = self
286+
280287
if return_indexer:
281-
_as = self.argsort()
288+
_as = idx.argsort()
282289
if not ascending:
283290
_as = _as[::-1]
284291
sorted_index = self.take(_as)
@@ -287,7 +294,7 @@ def sort_values(self, return_indexer=False, ascending=True):
287294
# NB: using asi8 instead of _ndarray_values matters in numpy 1.18
288295
# because the treatment of NaT has been changed to put NaT last
289296
# instead of first.
290-
sorted_values = np.sort(self.asi8)
297+
sorted_values = np.sort(idx.asi8)
291298
attribs = self._get_attributes_dict()
292299
freq = attribs["freq"]
293300

@@ -301,7 +308,7 @@ def sort_values(self, return_indexer=False, ascending=True):
301308
if not ascending:
302309
sorted_values = sorted_values[::-1]
303310

304-
return self._simple_new(sorted_values, **attribs)
311+
return self._simple_new(sorted_values, **attribs) # type: ignore
305312

306313
@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
307314
def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):

pandas/core/series.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -2828,14 +2828,14 @@ def update(self, other):
28282828
# ----------------------------------------------------------------------
28292829
# Reindexing, sorting
28302830

2831-
def sort_values(
2831+
def sort_values( # type: ignore
28322832
self,
28332833
axis=0,
28342834
ascending=True,
28352835
inplace=False,
28362836
kind="quicksort",
28372837
na_position="last",
2838-
key: Callable = None
2838+
key: Optional[Callable] = None,
28392839
):
28402840
"""
28412841
Sort by the values.
@@ -3026,7 +3026,7 @@ def _try_kind_sort(arr):
30263026
else:
30273027
return result.__finalize__(self)
30283028

3029-
def sort_index(
3029+
def sort_index( # type: ignore
30303030
self,
30313031
axis=0,
30323032
level=None,
@@ -3035,7 +3035,7 @@ def sort_index(
30353035
kind="quicksort",
30363036
na_position="last",
30373037
sort_remaining=True,
3038-
key : Callable = None
3038+
key: Optional[Callable] = None,
30393039
):
30403040
"""
30413041
Sort Series by index labels.

pandas/core/sorting.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
""" miscellaneous sorting / groupby utilities """
2-
from typing import Callable, Union
2+
from typing import Callable, Optional
33

44
import numpy as np
55

@@ -189,7 +189,8 @@ def indexer_from_factorized(labels, shape, compress: bool = True):
189189
return get_group_index_sorter(ids, ngroups)
190190

191191

192-
def lexsort_indexer(keys, orders=None, na_position="last", key : Union[Callable, None] = None):
192+
def lexsort_indexer(keys, orders=None, na_position="last", key=None):
193+
193194
from pandas.core.arrays import Categorical
194195

195196
labels = []
@@ -239,7 +240,13 @@ def lexsort_indexer(keys, orders=None, na_position="last", key : Union[Callable,
239240
return indexer_from_factorized(labels, shape)
240241

241242

242-
def nargsort(items, kind="quicksort", ascending=True, na_position="last", key: Union[Callable, None] = None):
243+
def nargsort(
244+
items,
245+
kind="quicksort",
246+
ascending: bool = True,
247+
na_position="last",
248+
key: Optional[Callable] = None,
249+
):
243250
"""
244251
This is intended to be a drop-in replacement for np.argsort which
245252
handles NaNs. It adds ascending and na_position parameters.
@@ -260,9 +267,11 @@ def nargsort(items, kind="quicksort", ascending=True, na_position="last", key: U
260267
if masked.size == 0:
261268
vals = np.array([]) # vectorize fails on empty object arrays
262269
else:
263-
vals = np.asarray(key_func(masked)) # revert from masked
270+
vals = np.asarray(key_func(masked)) # revert from masked
264271

265-
return nargsort(vals, kind=kind, ascending=ascending, na_position=na_position, key=None)
272+
return nargsort(
273+
vals, kind=kind, ascending=ascending, na_position=na_position, key=None
274+
)
266275

267276
idx = np.arange(len(items))
268277
non_nans = items[~mask]

pandas/tests/frame/test_sorting.py

+31-26
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_sort_values(self):
8181
with pytest.raises(ValueError, match=msg):
8282
frame.sort_values(by=["A", "B"], axis=0, ascending=[True] * 5)
8383

84-
@pytest.fixture(params=[None, lambda x : x])
84+
@pytest.fixture(params=[None, lambda x: x])
8585
def key(self, request):
8686
return request.param
8787

@@ -93,22 +93,22 @@ def test_sort_values_inplace(self, key):
9393
sorted_df = frame.copy()
9494
sorted_df.sort_values(by="A", inplace=True, key=key)
9595
expected = frame.sort_values(by="A", key=key)
96-
assert_frame_equal(sorted_df, expected)
96+
tm.assert_frame_equal(sorted_df, expected)
9797

9898
sorted_df = frame.copy()
9999
sorted_df.sort_values(by=1, axis=1, inplace=True, key=key)
100100
expected = frame.sort_values(by=1, axis=1, key=key)
101-
assert_frame_equal(sorted_df, expected)
101+
tm.assert_frame_equal(sorted_df, expected)
102102

103103
sorted_df = frame.copy()
104104
sorted_df.sort_values(by="A", ascending=False, inplace=True, key=key)
105105
expected = frame.sort_values(by="A", ascending=False, key=key)
106-
assert_frame_equal(sorted_df, expected)
106+
tm.assert_frame_equal(sorted_df, expected)
107107

108108
sorted_df = frame.copy()
109109
sorted_df.sort_values(by=["A", "B"], ascending=False, inplace=True, key=key)
110110
expected = frame.sort_values(by=["A", "B"], ascending=False, key=key)
111-
assert_frame_equal(sorted_df, expected)
111+
tm.assert_frame_equal(sorted_df, expected)
112112

113113
def test_sort_nan(self):
114114
# GH3917
@@ -256,15 +256,20 @@ def test_sort_multi_index_key(self):
256256
df = DataFrame(
257257
{"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")}
258258
)
259-
result = df.set_index(list("abc")).sort_index(level=list("ba"), key=lambda x : x[0])
259+
result = df.set_index(list("abc")).sort_index(
260+
level=list("ba"), key=lambda x: x[0]
261+
)
260262

261263
expected = DataFrame(
262264
{"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")}
263265
)
264266
expected = expected.set_index(list("abc"))
265267
tm.assert_frame_equal(result, expected)
266268

267-
result = df.set_index(list("abc")).sort_index(level=list("ba"), key=lambda x : x[2])
269+
result = df.set_index(list("abc")).sort_index(
270+
level=list("ba"), key=lambda x: x[2]
271+
)
272+
268273
expected = df.set_index(list("abc"))
269274
tm.assert_frame_equal(result, expected)
270275

@@ -526,63 +531,63 @@ def test_sort_index_key(self):
526531

527532
result = df.sort_index()
528533
expected = df.iloc[[2, 3, 0, 1, 5, 4]]
529-
assert_frame_equal(result, expected)
534+
tm.assert_frame_equal(result, expected)
530535

531536
result = df.sort_index(key=str.lower)
532537
expected = df.iloc[[0, 1, 5, 2, 3, 4]]
533-
assert_frame_equal(result, expected)
538+
tm.assert_frame_equal(result, expected)
534539

535540
result = df.sort_index(key=str.lower, ascending=False)
536541
expected = df.iloc[[4, 2, 3, 0, 1, 5]]
537-
assert_frame_equal(result, expected)
542+
tm.assert_frame_equal(result, expected)
538543

539-
@pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64', 'float32', 'float64'])
544+
@pytest.mark.parametrize("dtype", ["int8", "int64", "float64"])
540545
def test_sort_index_key_int(self, dtype):
541546
df = DataFrame(np.arange(6, dtype=dtype), index=np.arange(6, dtype=dtype))
542547

543548
result = df.sort_index()
544-
assert_frame_equal(result, df)
549+
tm.assert_frame_equal(result, df)
545550

546-
result = df.sort_index(key=lambda x : -x)
551+
result = df.sort_index(key=lambda x: -x)
547552
expected = df.sort_index(ascending=False)
548-
assert_frame_equal(result, expected)
553+
tm.assert_frame_equal(result, expected)
549554

550-
result = df.sort_index(key=lambda x : 2 * x)
551-
assert_frame_equal(result, df)
555+
result = df.sort_index(key=lambda x: 2 * x)
556+
tm.assert_frame_equal(result, df)
552557

553558
def test_sort_value_key(self):
554559
df = DataFrame(np.array([0, 5, np.nan, 3, 2, np.nan]))
555560

556561
result = df.sort_values(0)
557562
expected = df.iloc[[0, 4, 3, 1, 2, 5]]
558-
assert_frame_equal(result, expected)
563+
tm.assert_frame_equal(result, expected)
559564

560-
result = df.sort_values(0, key=lambda x : x + 5)
565+
result = df.sort_values(0, key=lambda x: x + 5)
561566
expected = df.iloc[[0, 4, 3, 1, 2, 5]]
562-
assert_frame_equal(result, expected)
567+
tm.assert_frame_equal(result, expected)
563568

564-
result = df.sort_values(0, key=lambda x : -x, ascending=False)
569+
result = df.sort_values(0, key=lambda x: -x, ascending=False)
565570
expected = df.iloc[[0, 4, 3, 1, 2, 5]]
566-
assert_frame_equal(result, expected)
571+
tm.assert_frame_equal(result, expected)
567572

568573
def test_sort_value_key_nan(self):
569574
df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
570575

571576
result = df.sort_values(1)
572577
expected = df[::-1]
573-
assert_frame_equal(result, expected)
578+
tm.assert_frame_equal(result, expected)
574579

575580
result = df.sort_values([0, 1], key=str.lower)
576-
assert_frame_equal(result, df)
581+
tm.assert_frame_equal(result, df)
577582

578583
result = df.sort_values([0, 1], key=str.lower, ascending=False)
579584
expected = df.sort_values(1, key=str.lower, ascending=False)
580-
assert_frame_equal(result, expected)
585+
tm.assert_frame_equal(result, expected)
581586

582-
@pytest.mark.parametrize('key', [None, lambda x : x])
587+
@pytest.mark.parametrize("key", [None, lambda x: x])
583588
def test_sort_value_key_empty(self, key):
584589
df = DataFrame(np.array([]))
585-
590+
586591
df.sort_values(0, key=key)
587592
df.sort_index(key=key)
588593

0 commit comments

Comments
 (0)