@@ -4558,11 +4558,15 @@ def nlargest(self, n, columns, keep='first'):
4558
4558
Number of rows to return.
4559
4559
columns : label or list of labels
4560
4560
Column label(s) to order by.
4561
- keep : {'first', 'last'}, default 'first'
4561
+ keep : {'first', 'last', 'all' }, default 'first'
4562
4562
Where there are duplicate values:
4563
4563
4564
4564
- `first` : prioritize the first occurrence(s)
4565
4565
- `last` : prioritize the last occurrence(s)
4566
+ - ``all`` : do not drop any duplicates, even it means
4567
+ selecting more than `n` items.
4568
+
4569
+ .. versionadded:: 0.24.0
4566
4570
4567
4571
Returns
4568
4572
-------
@@ -4585,42 +4589,51 @@ def nlargest(self, n, columns, keep='first'):
4585
4589
4586
4590
Examples
4587
4591
--------
4588
- >>> df = pd.DataFrame({'a': [1, 10, 8, 10, -1 ],
4589
- ... 'b': list('abdce '),
4590
- ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
4592
+ >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2 ],
4593
+ ... 'b': list('abdcef '),
4594
+ ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0 ]})
4591
4595
>>> df
4592
4596
a b c
4593
4597
0 1 a 1.0
4594
4598
1 10 b 2.0
4595
4599
2 8 d NaN
4596
- 3 10 c 3.0
4597
- 4 -1 e 4.0
4600
+ 3 11 c 3.0
4601
+ 4 8 e 4.0
4602
+ 5 2 f 9.0
4598
4603
4599
4604
In the following example, we will use ``nlargest`` to select the three
4600
4605
rows having the largest values in column "a".
4601
4606
4602
4607
>>> df.nlargest(3, 'a')
4603
4608
a b c
4609
+ 3 11 c 3.0
4604
4610
1 10 b 2.0
4605
- 3 10 c 3.0
4606
4611
2 8 d NaN
4607
4612
4608
4613
When using ``keep='last'``, ties are resolved in reverse order:
4609
4614
4610
4615
>>> df.nlargest(3, 'a', keep='last')
4611
4616
a b c
4612
- 3 10 c 3.0
4617
+ 3 11 c 3.0
4618
+ 1 10 b 2.0
4619
+ 4 8 e 4.0
4620
+
4621
+ When using ``keep='all'``, all duplicate items are maintained
4622
+ >>> df.nlargest(3, 'a', keep='all')
4623
+ a b c
4624
+ 3 11 c 3.0
4613
4625
1 10 b 2.0
4614
4626
2 8 d NaN
4627
+ 4 8 e 4.0
4615
4628
4616
4629
To order by the largest values in column "a" and then "c", we can
4617
4630
specify multiple columns like in the next example.
4618
4631
4619
4632
>>> df.nlargest(3, ['a', 'c'])
4620
4633
a b c
4621
- 3 10 c 3.0
4634
+ 4 8 e 4.0
4635
+ 3 11 c 3.0
4622
4636
1 10 b 2.0
4623
- 2 8 d NaN
4624
4637
4625
4638
Attempting to use ``nlargest`` on non-numeric dtypes will raise a
4626
4639
``TypeError``:
@@ -4644,25 +4657,73 @@ def nsmallest(self, n, columns, keep='first'):
4644
4657
Number of items to retrieve
4645
4658
columns : list or str
4646
4659
Column name or names to order by
4647
- keep : {'first', 'last'}, default 'first'
4660
+ keep : {'first', 'last', 'all' }, default 'first'
4648
4661
Where there are duplicate values:
4649
4662
- ``first`` : take the first occurrence.
4650
4663
- ``last`` : take the last occurrence.
4664
+ - ``all`` : do not drop any duplicates, even it means
4665
+ selecting more than `n` items.
4666
+
4667
+ .. versionadded:: 0.24.0
4651
4668
4652
4669
Returns
4653
4670
-------
4654
4671
DataFrame
4655
4672
4656
4673
Examples
4657
4674
--------
4658
- >>> df = pd.DataFrame({'a': [1, 10, 8, 11, -1],
4659
- ... 'b': list('abdce'),
4660
- ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
4675
+ >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2],
4676
+ ... 'b': list('abdcef'),
4677
+ ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]})
4678
+ >>> df
4679
+ a b c
4680
+ 0 1 a 1.0
4681
+ 1 10 b 2.0
4682
+ 2 8 d NaN
4683
+ 3 11 c 3.0
4684
+ 4 8 e 4.0
4685
+ 5 2 f 9.0
4686
+
4687
+ In the following example, we will use ``nsmallest`` to select the
4688
+ three rows having the smallest values in column "a".
4689
+
4661
4690
>>> df.nsmallest(3, 'a')
4662
- a b c
4663
- 4 -1 e 4
4664
- 0 1 a 1
4665
- 2 8 d NaN
4691
+ a b c
4692
+ 0 1 a 1.0
4693
+ 5 2 f 9.0
4694
+ 2 8 d NaN
4695
+
4696
+ When using ``keep='last'``, ties are resolved in reverse order:
4697
+
4698
+ >>> df.nsmallest(3, 'a', keep='last')
4699
+ a b c
4700
+ 0 1 a 1.0
4701
+ 5 2 f 9.0
4702
+ 4 8 e 4.0
4703
+
4704
+ When using ``keep='all'``, all duplicate items are maintained
4705
+ >>> df.nsmallest(3, 'a', keep='all')
4706
+ a b c
4707
+ 0 1 a 1.0
4708
+ 5 2 f 9.0
4709
+ 2 8 d NaN
4710
+ 4 8 e 4.0
4711
+
4712
+ To order by the largest values in column "a" and then "c", we can
4713
+ specify multiple columns like in the next example.
4714
+
4715
+ >>> df.nsmallest(3, ['a', 'c'])
4716
+ a b c
4717
+ 0 1 a 1.0
4718
+ 5 2 f 9.0
4719
+ 4 8 e 4.0
4720
+
4721
+ Attempting to use ``nsmallest`` on non-numeric dtypes will raise a
4722
+ ``TypeError``:
4723
+
4724
+ >>> df.nsmallest(3, 'b')
4725
+ Traceback (most recent call last):
4726
+ TypeError: Column 'b' has dtype object, cannot use method 'nsmallest'
4666
4727
"""
4667
4728
return algorithms .SelectNFrame (self ,
4668
4729
n = n ,
0 commit comments