Skip to content

Commit 0479a80

Browse files
committed
Merge pull request pandas-dev#10393 from cpcloud/df-partial-sort
Add nlargest/nsmallest for DataFrame
2 parents f1719b7 + 39b8ce3 commit 0479a80

File tree

5 files changed

+125
-0
lines changed

5 files changed

+125
-0
lines changed

doc/source/api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -904,6 +904,8 @@ Reshaping, sorting, transposing
904904
DataFrame.sort
905905
DataFrame.sort_index
906906
DataFrame.sortlevel
907+
DataFrame.nlargest
908+
DataFrame.nsmallest
907909
DataFrame.swaplevel
908910
DataFrame.stack
909911
DataFrame.unstack

doc/source/basics.rst

+14
Original file line numberDiff line numberDiff line change
@@ -1497,6 +1497,20 @@ faster than sorting the entire Series and calling ``head(n)`` on the result.
14971497
s.nsmallest(3)
14981498
s.nlargest(3)
14991499
1500+
.. versionadded:: 0.17.0
1501+
1502+
``DataFrame`` also has the ``nlargest`` and ``nsmallest`` methods.
1503+
1504+
.. ipython:: python
1505+
1506+
df = DataFrame({'a': [-2, -1, 1, 10, 8, 11, -1],
1507+
'b': list('abdceff'),
1508+
'c': [1.0, 2.0, 4.0, 3.2, np.nan, 3.0, 4.0]})
1509+
df.nlargest(3, 'a')
1510+
df.nlargest(5, ['a', 'c'])
1511+
df.nsmallest(3, 'a')
1512+
df.nsmallest(5, ['a', 'c'])
1513+
15001514
15011515
.. _basics.multi-index_sorting:
15021516

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Check the :ref:`API Changes <whatsnew_0170.api>` and :ref:`deprecations <whatsne
3232
New features
3333
~~~~~~~~~~~~
3434

35+
- ``DataFrame`` has the ``nlargest`` and ``nsmallest`` methods (:issue:`10393`)
3536
- SQL io functions now accept a SQLAlchemy connectable. (:issue:`7877`)
3637
- Enable writing complex values to HDF stores when using table format (:issue:`10447`)
3738
- Enable reading gzip compressed files via URL, either by explicitly setting the compression parameter or by inferring from the presence of the HTTP Content-Encoding header in the response (:issue:`8685`)

pandas/core/frame.py

+73
Original file line numberDiff line numberDiff line change
@@ -3127,6 +3127,79 @@ def sortlevel(self, level=0, axis=0, ascending=True,
31273127
else:
31283128
return self._constructor(new_data).__finalize__(self)
31293129

3130+
def _nsorted(self, columns, n, method, take_last):
3131+
if not com.is_list_like(columns):
3132+
columns = [columns]
3133+
columns = list(columns)
3134+
ser = getattr(self[columns[0]], method)(n, take_last=take_last)
3135+
ascending = dict(nlargest=False, nsmallest=True)[method]
3136+
return self.loc[ser.index].sort(columns, ascending=ascending,
3137+
kind='mergesort')
3138+
3139+
def nlargest(self, n, columns, take_last=False):
3140+
"""Get the rows of a DataFrame sorted by the `n` largest
3141+
values of `columns`.
3142+
3143+
.. versionadded:: 0.17.0
3144+
3145+
Parameters
3146+
----------
3147+
n : int
3148+
Number of items to retrieve
3149+
columns : list or str
3150+
Column name or names to order by
3151+
take_last : bool, optional
3152+
Where there are duplicate values, take the last duplicate
3153+
3154+
Returns
3155+
-------
3156+
DataFrame
3157+
3158+
Examples
3159+
--------
3160+
>>> df = DataFrame({'a': [1, 10, 8, 11, -1],
3161+
... 'b': list('abdce'),
3162+
... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
3163+
>>> df.nlargest(3, 'a')
3164+
a b c
3165+
3 11 c 3
3166+
1 10 b 2
3167+
2 8 d NaN
3168+
"""
3169+
return self._nsorted(columns, n, 'nlargest', take_last)
3170+
3171+
def nsmallest(self, n, columns, take_last=False):
3172+
"""Get the rows of a DataFrame sorted by the `n` smallest
3173+
values of `columns`.
3174+
3175+
.. versionadded:: 0.17.0
3176+
3177+
Parameters
3178+
----------
3179+
n : int
3180+
Number of items to retrieve
3181+
columns : list or str
3182+
Column name or names to order by
3183+
take_last : bool, optional
3184+
Where there are duplicate values, take the last duplicate
3185+
3186+
Returns
3187+
-------
3188+
DataFrame
3189+
3190+
Examples
3191+
--------
3192+
>>> df = DataFrame({'a': [1, 10, 8, 11, -1],
3193+
... 'b': list('abdce'),
3194+
... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
3195+
>>> df.nsmallest(3, 'a')
3196+
a b c
3197+
4 -1 e 4
3198+
0 1 a 1
3199+
2 8 d NaN
3200+
"""
3201+
return self._nsorted(columns, n, 'nsmallest', take_last)
3202+
31303203
def swaplevel(self, i, j, axis=0):
31313204
"""
31323205
Swap levels i and j in a MultiIndex on a particular axis

pandas/tests/test_frame.py

+35
Original file line numberDiff line numberDiff line change
@@ -14609,6 +14609,41 @@ def test_dataframe_metadata(self):
1460914609
self.assertEqual(df._metadata, unpickled._metadata)
1461014610
self.assertEqual(df.testattr, unpickled.testattr)
1461114611

14612+
def test_nlargest(self):
14613+
# GH10393
14614+
from string import ascii_lowercase
14615+
df = pd.DataFrame({'a': np.random.permutation(10),
14616+
'b': list(ascii_lowercase[:10])})
14617+
result = df.nlargest(5, 'a')
14618+
expected = df.sort('a', ascending=False).head(5)
14619+
tm.assert_frame_equal(result, expected)
14620+
14621+
def test_nlargest_multiple_columns(self):
14622+
from string import ascii_lowercase
14623+
df = pd.DataFrame({'a': np.random.permutation(10),
14624+
'b': list(ascii_lowercase[:10]),
14625+
'c': np.random.permutation(10).astype('float64')})
14626+
result = df.nlargest(5, ['a', 'b'])
14627+
expected = df.sort(['a', 'b'], ascending=False).head(5)
14628+
tm.assert_frame_equal(result, expected)
14629+
14630+
def test_nsmallest(self):
14631+
from string import ascii_lowercase
14632+
df = pd.DataFrame({'a': np.random.permutation(10),
14633+
'b': list(ascii_lowercase[:10])})
14634+
result = df.nsmallest(5, 'a')
14635+
expected = df.sort('a').head(5)
14636+
tm.assert_frame_equal(result, expected)
14637+
14638+
def test_nsmallest_multiple_columns(self):
14639+
from string import ascii_lowercase
14640+
df = pd.DataFrame({'a': np.random.permutation(10),
14641+
'b': list(ascii_lowercase[:10]),
14642+
'c': np.random.permutation(10).astype('float64')})
14643+
result = df.nsmallest(5, ['a', 'c'])
14644+
expected = df.sort(['a', 'c']).head(5)
14645+
tm.assert_frame_equal(result, expected)
14646+
1461214647
def test_to_panel_expanddim(self):
1461314648
# GH 9762
1461414649

0 commit comments

Comments
 (0)