Skip to content

Commit 5dd8f2c

Browse files
committed
Merge pull request #7356 from cpcloud/groupby-nlarg-smal-7053
ENH/GBY: add nlargest/nsmallest to Series.groupby
2 parents d01c2f5 + f6cc881 commit 5dd8f2c

File tree

4 files changed

+39
-1
lines changed

4 files changed

+39
-1
lines changed

doc/source/groupby.rst

+12
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,18 @@ In this example, we chopped the collection of time series into yearly chunks
664664
then independently called :ref:`fillna <missing_data.fillna>` on the
665665
groups.
666666

667+
.. versionadded:: 0.14.1
668+
669+
The ``nlargest`` and ``nsmallest`` methods work on ``Series`` style groupbys:
670+
671+
.. ipython:: python
672+
673+
s = Series([9, 8, 7, 5, 19, 1, 4.2, 3.3])
674+
g = Series(list('abababab'))
675+
gb = s.groupby(g)
676+
gb.nlargest(3)
677+
gb.nsmallest(3)
678+
667679
.. _groupby.apply:
668680

669681
Flexible ``apply``

doc/source/v0.14.1.txt

+3
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ Enhancements
114114

115115
- Implemented ``sem`` (standard error of the mean) operation for ``Series``,
116116
``DataFrame``, ``Panel``, and ``Groupby`` (:issue:`6897`)
117+
- Add ``nlargest`` and ``nsmallest`` to the ``Series`` ``groupby`` whitelist,
118+
which means you can now use these methods on a ``SeriesGroupBy`` object
119+
(:issue:`7053`).
117120

118121

119122

pandas/core/groupby.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@
7878

7979
_series_apply_whitelist = \
8080
(_common_apply_whitelist - set(['boxplot'])) | \
81-
frozenset(['dtype', 'value_counts', 'unique', 'nunique'])
81+
frozenset(['dtype', 'value_counts', 'unique', 'nunique',
82+
'nlargest', 'nsmallest'])
8283

8384
_dataframe_apply_whitelist = \
8485
_common_apply_whitelist | frozenset(['dtypes', 'corrwith'])

pandas/tests/test_groupby.py

+22
Original file line numberDiff line numberDiff line change
@@ -4047,6 +4047,7 @@ def test_groupby_whitelist(self):
40474047
'value_counts',
40484048
'diff',
40494049
'unique', 'nunique',
4050+
'nlargest', 'nsmallest',
40504051
])
40514052

40524053
for obj, whitelist in zip((df, s),
@@ -4381,6 +4382,27 @@ def test_max_nan_bug(self):
43814382
tm.assert_frame_equal(r, e)
43824383
self.assertFalse(r['File'].isnull().any())
43834384

4385+
def test_nlargest(self):
4386+
a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
4387+
b = Series(list('a' * 5 + 'b' * 5))
4388+
gb = a.groupby(b)
4389+
r = gb.nlargest(3)
4390+
e = Series([7, 5, 3, 10, 9, 6],
4391+
index=MultiIndex.from_arrays([list('aaabbb'),
4392+
[3, 2, 1, 9, 5, 8]]))
4393+
tm.assert_series_equal(r, e)
4394+
4395+
def test_nsmallest(self):
4396+
a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10])
4397+
b = Series(list('a' * 5 + 'b' * 5))
4398+
gb = a.groupby(b)
4399+
r = gb.nsmallest(3)
4400+
e = Series([1, 2, 3, 0, 4, 6],
4401+
index=MultiIndex.from_arrays([list('aaabbb'),
4402+
[0, 4, 1, 6, 7, 8]]))
4403+
tm.assert_series_equal(r, e)
4404+
4405+
43844406
def assert_fp_equal(a, b):
43854407
assert (np.abs(a - b) < 1e-12).all()
43864408

0 commit comments

Comments
 (0)