@@ -4559,11 +4559,15 @@ def nlargest(self, n, columns, keep='first'):
4559
4559
Number of rows to return.
4560
4560
columns : label or list of labels
4561
4561
Column label(s) to order by.
4562
- keep : {'first', 'last'}, default 'first'
4562
+ keep : {'first', 'last', 'all' }, default 'first'
4563
4563
Where there are duplicate values:
4564
4564
4565
4565
- `first` : prioritize the first occurrence(s)
4566
4566
- `last` : prioritize the last occurrence(s)
4567
+ - ``all`` : do not drop any duplicates, even it means
4568
+ selecting more than `n` items.
4569
+
4570
+ .. versionadded:: 0.24.0
4567
4571
4568
4572
Returns
4569
4573
-------
@@ -4586,47 +4590,58 @@ def nlargest(self, n, columns, keep='first'):
4586
4590
4587
4591
Examples
4588
4592
--------
4589
- >>> df = pd.DataFrame({'a': [1, 10, 8, 10, -1 ],
4590
- ... 'b': list('abdce '),
4591
- ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
4593
+ >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2 ],
4594
+ ... 'b': list('abdcef '),
4595
+ ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0 ]})
4592
4596
>>> df
4593
4597
a b c
4594
4598
0 1 a 1.0
4595
4599
1 10 b 2.0
4596
4600
2 8 d NaN
4597
- 3 10 c 3.0
4598
- 4 -1 e 4.0
4601
+ 3 11 c 3.0
4602
+ 4 8 e 4.0
4603
+ 5 2 f 9.0
4599
4604
4600
4605
In the following example, we will use ``nlargest`` to select the three
4601
4606
rows having the largest values in column "a".
4602
4607
4603
4608
>>> df.nlargest(3, 'a')
4604
4609
a b c
4610
+ 3 11 c 3.0
4605
4611
1 10 b 2.0
4606
- 3 10 c 3.0
4607
4612
2 8 d NaN
4608
4613
4609
4614
When using ``keep='last'``, ties are resolved in reverse order:
4610
4615
4611
4616
>>> df.nlargest(3, 'a', keep='last')
4612
4617
a b c
4613
- 3 10 c 3.0
4618
+ 3 11 c 3.0
4619
+ 1 10 b 2.0
4620
+ 4 8 e 4.0
4621
+
4622
+ When using ``keep='all'``, all duplicate items are maintained:
4623
+
4624
+ >>> df.nlargest(3, 'a', keep='all')
4625
+ a b c
4626
+ 3 11 c 3.0
4614
4627
1 10 b 2.0
4615
4628
2 8 d NaN
4629
+ 4 8 e 4.0
4616
4630
4617
4631
To order by the largest values in column "a" and then "c", we can
4618
4632
specify multiple columns like in the next example.
4619
4633
4620
4634
>>> df.nlargest(3, ['a', 'c'])
4621
4635
a b c
4622
- 3 10 c 3.0
4636
+ 4 8 e 4.0
4637
+ 3 11 c 3.0
4623
4638
1 10 b 2.0
4624
- 2 8 d NaN
4625
4639
4626
4640
Attempting to use ``nlargest`` on non-numeric dtypes will raise a
4627
4641
``TypeError``:
4628
4642
4629
4643
>>> df.nlargest(3, 'b')
4644
+
4630
4645
Traceback (most recent call last):
4631
4646
TypeError: Column 'b' has dtype object, cannot use method 'nlargest'
4632
4647
"""
@@ -4645,25 +4660,75 @@ def nsmallest(self, n, columns, keep='first'):
4645
4660
Number of items to retrieve
4646
4661
columns : list or str
4647
4662
Column name or names to order by
4648
- keep : {'first', 'last'}, default 'first'
4663
+ keep : {'first', 'last', 'all' }, default 'first'
4649
4664
Where there are duplicate values:
4650
4665
- ``first`` : take the first occurrence.
4651
4666
- ``last`` : take the last occurrence.
4667
+ - ``all`` : do not drop any duplicates, even it means
4668
+ selecting more than `n` items.
4669
+
4670
+ .. versionadded:: 0.24.0
4652
4671
4653
4672
Returns
4654
4673
-------
4655
4674
DataFrame
4656
4675
4657
4676
Examples
4658
4677
--------
4659
- >>> df = pd.DataFrame({'a': [1, 10, 8, 11, -1],
4660
- ... 'b': list('abdce'),
4661
- ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
4678
+ >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2],
4679
+ ... 'b': list('abdcef'),
4680
+ ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]})
4681
+ >>> df
4682
+ a b c
4683
+ 0 1 a 1.0
4684
+ 1 10 b 2.0
4685
+ 2 8 d NaN
4686
+ 3 11 c 3.0
4687
+ 4 8 e 4.0
4688
+ 5 2 f 9.0
4689
+
4690
+ In the following example, we will use ``nsmallest`` to select the
4691
+ three rows having the smallest values in column "a".
4692
+
4662
4693
>>> df.nsmallest(3, 'a')
4663
- a b c
4664
- 4 -1 e 4
4665
- 0 1 a 1
4666
- 2 8 d NaN
4694
+ a b c
4695
+ 0 1 a 1.0
4696
+ 5 2 f 9.0
4697
+ 2 8 d NaN
4698
+
4699
+ When using ``keep='last'``, ties are resolved in reverse order:
4700
+
4701
+ >>> df.nsmallest(3, 'a', keep='last')
4702
+ a b c
4703
+ 0 1 a 1.0
4704
+ 5 2 f 9.0
4705
+ 4 8 e 4.0
4706
+
4707
+ When using ``keep='all'``, all duplicate items are maintained:
4708
+
4709
+ >>> df.nsmallest(3, 'a', keep='all')
4710
+ a b c
4711
+ 0 1 a 1.0
4712
+ 5 2 f 9.0
4713
+ 2 8 d NaN
4714
+ 4 8 e 4.0
4715
+
4716
+ To order by the largest values in column "a" and then "c", we can
4717
+ specify multiple columns like in the next example.
4718
+
4719
+ >>> df.nsmallest(3, ['a', 'c'])
4720
+ a b c
4721
+ 0 1 a 1.0
4722
+ 5 2 f 9.0
4723
+ 4 8 e 4.0
4724
+
4725
+ Attempting to use ``nsmallest`` on non-numeric dtypes will raise a
4726
+ ``TypeError``:
4727
+
4728
+ >>> df.nsmallest(3, 'b')
4729
+
4730
+ Traceback (most recent call last):
4731
+ TypeError: Column 'b' has dtype object, cannot use method 'nsmallest'
4667
4732
"""
4668
4733
return algorithms .SelectNFrame (self ,
4669
4734
n = n ,
0 commit comments