Skip to content

Commit 917d6c8

Browse files
BUG: fixed formatting and small linting issues
1 parent 23d4b2d commit 917d6c8

File tree

10 files changed

+99
-72
lines changed

10 files changed

+99
-72
lines changed

doc/source/whatsnew/v1.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ Other enhancements
150150
- Roundtripping DataFrames with nullable integer or string data types to parquet
151151
(:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
152152
now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).
153+
- :meth:`DataFrame.sort_values`, :meth:`DataFrame.sort_index`, :meth:`Series.sort_index`, and :meth:`Series.sort_index`
154+
now support the ``key`` argument which allows for custom sorting orders (:issue:`3942`)
153155

154156
Build Changes
155157
^^^^^^^^^^^^^

pandas/core/arrays/categorical.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1592,7 +1592,7 @@ def argsort(self, ascending=True, kind="quicksort", *args, **kwargs):
15921592
"""
15931593
return super().argsort(ascending=ascending, kind=kind, *args, **kwargs)
15941594

1595-
def sort_values(self, inplace=False, ascending=True, na_position="last"):
1595+
def sort_values(self, inplace=False, ascending=True, na_position="last", key=None):
15961596
"""
15971597
Sort the Categorical by category value returning a new
15981598
Categorical by default.
@@ -1670,7 +1670,9 @@ def sort_values(self, inplace=False, ascending=True, na_position="last"):
16701670
if na_position not in ["last", "first"]:
16711671
raise ValueError(f"invalid na_position: {na_position!r}")
16721672

1673-
sorted_idx = nargsort(self, ascending=ascending, na_position=na_position)
1673+
sorted_idx = nargsort(
1674+
self, ascending=ascending, na_position=na_position, key=key
1675+
)
16741676

16751677
if inplace:
16761678
self._codes = self._codes[sorted_idx]

pandas/core/frame.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import sys
1616
from textwrap import dedent
1717
from typing import (
18+
Callable,
1819
FrozenSet,
1920
Hashable,
2021
Iterable,
@@ -25,7 +26,6 @@
2526
Tuple,
2627
Type,
2728
Union,
28-
Callable
2929
)
3030
import warnings
3131

@@ -4715,15 +4715,15 @@ def f(vals):
47154715

47164716
@Substitution(**_shared_doc_kwargs)
47174717
@Appender(NDFrame.sort_values.__doc__)
4718-
def sort_values(
4718+
def sort_values( # type: ignore
47194719
self,
47204720
by,
47214721
axis=0,
47224722
ascending=True,
47234723
inplace=False,
47244724
kind="quicksort",
47254725
na_position="last",
4726-
key : Union[Callable, None] = None
4726+
key: Optional[Callable] = None,
47274727
):
47284728
inplace = validate_bool_kwarg(inplace, "inplace")
47294729
axis = self._get_axis_number(axis)
@@ -4738,7 +4738,9 @@ def sort_values(
47384738
from pandas.core.sorting import lexsort_indexer
47394739

47404740
keys = [self._get_label_or_level_values(x, axis=axis) for x in by]
4741-
indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position, key=key)
4741+
indexer = lexsort_indexer(
4742+
keys, orders=ascending, na_position=na_position, key=key
4743+
)
47424744
indexer = ensure_platform_int(indexer)
47434745
else:
47444746
from pandas.core.sorting import nargsort
@@ -4764,7 +4766,7 @@ def sort_values(
47644766

47654767
@Substitution(**_shared_doc_kwargs)
47664768
@Appender(NDFrame.sort_index.__doc__)
4767-
def sort_index(
4769+
def sort_index( # type: ignore
47684770
self,
47694771
axis=0,
47704772
level=None,
@@ -4774,7 +4776,7 @@ def sort_index(
47744776
na_position="last",
47754777
sort_remaining=True,
47764778
by=None,
4777-
key : Union[Callable, None] = None
4779+
key: Optional[Callable] = None,
47784780
):
47794781

47804782
# TODO: this can be combined with Series.sort_index impl as

pandas/core/indexes/base.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
from datetime import datetime
22
import operator
33
from textwrap import dedent
4-
5-
from typing import FrozenSet, Union, Callable
4+
from typing import Callable, FrozenSet, Optional, Union
65
import warnings
76

87
import numpy as np
@@ -4401,7 +4400,9 @@ def asof_locs(self, where, mask):
44014400

44024401
return result
44034402

4404-
def sort_values(self, return_indexer=False, ascending=True, key : Callable = None):
4403+
def sort_values(
4404+
self, return_indexer=False, ascending=True, key: Optional[Callable] = None
4405+
):
44054406
"""
44064407
Return a sorted copy of the index.
44074408
@@ -4415,7 +4416,7 @@ def sort_values(self, return_indexer=False, ascending=True, key : Callable = Non
44154416
ascending : bool, default True
44164417
Should the index values be sorted in an ascending order.
44174418
key : Callable, default None
4418-
Apply a key function to the indices before sorting, like
4419+
Apply a key function to the indices before sorting, like
44194420
built-in sorted function.
44204421
44214422
Returns

pandas/core/indexes/datetimelike.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Base and utility classes for tseries type pandas objects.
33
"""
44
import operator
5-
from typing import Set
5+
from typing import Callable, Optional, Set
66

77
import numpy as np
88

@@ -273,12 +273,19 @@ def map(self, mapper, na_action=None):
273273
except Exception:
274274
return self.astype(object).map(mapper)
275275

276-
def sort_values(self, return_indexer=False, ascending=True):
276+
def sort_values(
277+
self, return_indexer=False, ascending=True, key: Optional[Callable] = None
278+
):
277279
"""
278280
Return sorted copy of Index.
279281
"""
282+
if key:
283+
idx = self.map(key, na_action="ignore")
284+
else:
285+
idx = self
286+
280287
if return_indexer:
281-
_as = self.argsort()
288+
_as = idx.argsort()
282289
if not ascending:
283290
_as = _as[::-1]
284291
sorted_index = self.take(_as)
@@ -287,7 +294,7 @@ def sort_values(self, return_indexer=False, ascending=True):
287294
# NB: using asi8 instead of _ndarray_values matters in numpy 1.18
288295
# because the treatment of NaT has been changed to put NaT last
289296
# instead of first.
290-
sorted_values = np.sort(self.asi8)
297+
sorted_values = np.sort(idx.asi8)
291298
attribs = self._get_attributes_dict()
292299
freq = attribs["freq"]
293300

@@ -301,7 +308,7 @@ def sort_values(self, return_indexer=False, ascending=True):
301308
if not ascending:
302309
sorted_values = sorted_values[::-1]
303310

304-
return self._simple_new(sorted_values, **attribs)
311+
return self._simple_new(sorted_values, **attribs) # type: ignore
305312

306313
@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
307314
def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):

pandas/core/series.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -2831,14 +2831,14 @@ def update(self, other):
28312831
# ----------------------------------------------------------------------
28322832
# Reindexing, sorting
28332833

2834-
def sort_values(
2834+
def sort_values( # type: ignore
28352835
self,
28362836
axis=0,
28372837
ascending=True,
28382838
inplace=False,
28392839
kind="quicksort",
28402840
na_position="last",
2841-
key: Callable = None
2841+
key: Optional[Callable] = None,
28422842
):
28432843
"""
28442844
Sort by the values.
@@ -3029,7 +3029,7 @@ def _try_kind_sort(arr):
30293029
else:
30303030
return result.__finalize__(self)
30313031

3032-
def sort_index(
3032+
def sort_index( # type: ignore
30333033
self,
30343034
axis=0,
30353035
level=None,
@@ -3038,7 +3038,7 @@ def sort_index(
30383038
kind="quicksort",
30393039
na_position="last",
30403040
sort_remaining=True,
3041-
key : Callable = None
3041+
key: Optional[Callable] = None,
30423042
):
30433043
"""
30443044
Sort Series by index labels.

pandas/core/sorting.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
""" miscellaneous sorting / groupby utilities """
2-
from typing import Callable, Union
2+
from typing import Callable, Optional
33

44
import numpy as np
55

@@ -189,7 +189,8 @@ def indexer_from_factorized(labels, shape, compress: bool = True):
189189
return get_group_index_sorter(ids, ngroups)
190190

191191

192-
def lexsort_indexer(keys, orders=None, na_position="last", key : Union[Callable, None] = None):
192+
def lexsort_indexer(keys, orders=None, na_position="last", key=None):
193+
193194
from pandas.core.arrays import Categorical
194195

195196
labels = []
@@ -239,7 +240,13 @@ def lexsort_indexer(keys, orders=None, na_position="last", key : Union[Callable,
239240
return indexer_from_factorized(labels, shape)
240241

241242

242-
def nargsort(items, kind="quicksort", ascending=True, na_position="last", key: Union[Callable, None] = None):
243+
def nargsort(
244+
items,
245+
kind="quicksort",
246+
ascending: bool = True,
247+
na_position="last",
248+
key: Optional[Callable] = None,
249+
):
243250
"""
244251
This is intended to be a drop-in replacement for np.argsort which
245252
handles NaNs. It adds ascending and na_position parameters.
@@ -260,9 +267,11 @@ def nargsort(items, kind="quicksort", ascending=True, na_position="last", key: U
260267
if masked.size == 0:
261268
vals = np.array([]) # vectorize fails on empty object arrays
262269
else:
263-
vals = np.asarray(key_func(masked)) # revert from masked
270+
vals = np.asarray(key_func(masked)) # revert from masked
264271

265-
return nargsort(vals, kind=kind, ascending=ascending, na_position=na_position, key=None)
272+
return nargsort(
273+
vals, kind=kind, ascending=ascending, na_position=na_position, key=None
274+
)
266275

267276
idx = np.arange(len(items))
268277
non_nans = items[~mask]

pandas/tests/frame/test_sorting.py

+31-26
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_sort_values(self):
8181
with pytest.raises(ValueError, match=msg):
8282
frame.sort_values(by=["A", "B"], axis=0, ascending=[True] * 5)
8383

84-
@pytest.fixture(params=[None, lambda x : x])
84+
@pytest.fixture(params=[None, lambda x: x])
8585
def key(self, request):
8686
return request.param
8787

@@ -93,22 +93,22 @@ def test_sort_values_inplace(self, key):
9393
sorted_df = frame.copy()
9494
sorted_df.sort_values(by="A", inplace=True, key=key)
9595
expected = frame.sort_values(by="A", key=key)
96-
assert_frame_equal(sorted_df, expected)
96+
tm.assert_frame_equal(sorted_df, expected)
9797

9898
sorted_df = frame.copy()
9999
sorted_df.sort_values(by=1, axis=1, inplace=True, key=key)
100100
expected = frame.sort_values(by=1, axis=1, key=key)
101-
assert_frame_equal(sorted_df, expected)
101+
tm.assert_frame_equal(sorted_df, expected)
102102

103103
sorted_df = frame.copy()
104104
sorted_df.sort_values(by="A", ascending=False, inplace=True, key=key)
105105
expected = frame.sort_values(by="A", ascending=False, key=key)
106-
assert_frame_equal(sorted_df, expected)
106+
tm.assert_frame_equal(sorted_df, expected)
107107

108108
sorted_df = frame.copy()
109109
sorted_df.sort_values(by=["A", "B"], ascending=False, inplace=True, key=key)
110110
expected = frame.sort_values(by=["A", "B"], ascending=False, key=key)
111-
assert_frame_equal(sorted_df, expected)
111+
tm.assert_frame_equal(sorted_df, expected)
112112

113113
def test_sort_nan(self):
114114
# GH3917
@@ -256,15 +256,20 @@ def test_sort_multi_index_key(self):
256256
df = DataFrame(
257257
{"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")}
258258
)
259-
result = df.set_index(list("abc")).sort_index(level=list("ba"), key=lambda x : x[0])
259+
result = df.set_index(list("abc")).sort_index(
260+
level=list("ba"), key=lambda x: x[0]
261+
)
260262

261263
expected = DataFrame(
262264
{"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")}
263265
)
264266
expected = expected.set_index(list("abc"))
265267
tm.assert_frame_equal(result, expected)
266268

267-
result = df.set_index(list("abc")).sort_index(level=list("ba"), key=lambda x : x[2])
269+
result = df.set_index(list("abc")).sort_index(
270+
level=list("ba"), key=lambda x: x[2]
271+
)
272+
268273
expected = df.set_index(list("abc"))
269274
tm.assert_frame_equal(result, expected)
270275

@@ -594,63 +599,63 @@ def test_sort_index_key(self):
594599

595600
result = df.sort_index()
596601
expected = df.iloc[[2, 3, 0, 1, 5, 4]]
597-
assert_frame_equal(result, expected)
602+
tm.assert_frame_equal(result, expected)
598603

599604
result = df.sort_index(key=str.lower)
600605
expected = df.iloc[[0, 1, 5, 2, 3, 4]]
601-
assert_frame_equal(result, expected)
606+
tm.assert_frame_equal(result, expected)
602607

603608
result = df.sort_index(key=str.lower, ascending=False)
604609
expected = df.iloc[[4, 2, 3, 0, 1, 5]]
605-
assert_frame_equal(result, expected)
610+
tm.assert_frame_equal(result, expected)
606611

607-
@pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64', 'float32', 'float64'])
612+
@pytest.mark.parametrize("dtype", ["int8", "int64", "float64"])
608613
def test_sort_index_key_int(self, dtype):
609614
df = DataFrame(np.arange(6, dtype=dtype), index=np.arange(6, dtype=dtype))
610615

611616
result = df.sort_index()
612-
assert_frame_equal(result, df)
617+
tm.assert_frame_equal(result, df)
613618

614-
result = df.sort_index(key=lambda x : -x)
619+
result = df.sort_index(key=lambda x: -x)
615620
expected = df.sort_index(ascending=False)
616-
assert_frame_equal(result, expected)
621+
tm.assert_frame_equal(result, expected)
617622

618-
result = df.sort_index(key=lambda x : 2 * x)
619-
assert_frame_equal(result, df)
623+
result = df.sort_index(key=lambda x: 2 * x)
624+
tm.assert_frame_equal(result, df)
620625

621626
def test_sort_value_key(self):
622627
df = DataFrame(np.array([0, 5, np.nan, 3, 2, np.nan]))
623628

624629
result = df.sort_values(0)
625630
expected = df.iloc[[0, 4, 3, 1, 2, 5]]
626-
assert_frame_equal(result, expected)
631+
tm.assert_frame_equal(result, expected)
627632

628-
result = df.sort_values(0, key=lambda x : x + 5)
633+
result = df.sort_values(0, key=lambda x: x + 5)
629634
expected = df.iloc[[0, 4, 3, 1, 2, 5]]
630-
assert_frame_equal(result, expected)
635+
tm.assert_frame_equal(result, expected)
631636

632-
result = df.sort_values(0, key=lambda x : -x, ascending=False)
637+
result = df.sort_values(0, key=lambda x: -x, ascending=False)
633638
expected = df.iloc[[0, 4, 3, 1, 2, 5]]
634-
assert_frame_equal(result, expected)
639+
tm.assert_frame_equal(result, expected)
635640

636641
def test_sort_value_key_nan(self):
637642
df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
638643

639644
result = df.sort_values(1)
640645
expected = df[::-1]
641-
assert_frame_equal(result, expected)
646+
tm.assert_frame_equal(result, expected)
642647

643648
result = df.sort_values([0, 1], key=str.lower)
644-
assert_frame_equal(result, df)
649+
tm.assert_frame_equal(result, df)
645650

646651
result = df.sort_values([0, 1], key=str.lower, ascending=False)
647652
expected = df.sort_values(1, key=str.lower, ascending=False)
648-
assert_frame_equal(result, expected)
653+
tm.assert_frame_equal(result, expected)
649654

650-
@pytest.mark.parametrize('key', [None, lambda x : x])
655+
@pytest.mark.parametrize("key", [None, lambda x: x])
651656
def test_sort_value_key_empty(self, key):
652657
df = DataFrame(np.array([]))
653-
658+
654659
df.sort_values(0, key=key)
655660
df.sort_index(key=key)
656661

0 commit comments

Comments
 (0)