@@ -4348,34 +4348,93 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False,
4348
4348
inplace = inplace , sort_remaining = sort_remaining )
4349
4349
4350
4350
def nlargest (self , n , columns , keep = 'first' ):
4351
- """Get the rows of a DataFrame sorted by the `n` largest
4352
- values of `columns`.
4351
+ """
4352
+ Return the first `n` rows ordered by `columns` in descending order.
4353
+
4354
+ Return the first `n` rows with the largest values in `columns`, in
4355
+ descending order. The columns that are not specified are returned as
4356
+ well, but not used for ordering.
4357
+
4358
+ This method is equivalent to
4359
+ ``df.sort_values(columns, ascending=False).head(n)``, but more
4360
+ performant.
4353
4361
4354
4362
Parameters
4355
4363
----------
4356
4364
n : int
4357
- Number of items to retrieve
4358
- columns : list or str
4359
- Column name or names to order by
4365
+ Number of rows to return.
4366
+ columns : label or list of labels
4367
+ Column label(s) to order by.
4360
4368
keep : {'first', 'last'}, default 'first'
4361
4369
Where there are duplicate values:
4362
- - ``first`` : take the first occurrence.
4363
- - ``last`` : take the last occurrence.
4370
+
4371
+ - `first` : prioritize the first occurrence(s)
4372
+ - `last` : prioritize the last occurrence(s)
4364
4373
4365
4374
Returns
4366
4375
-------
4367
4376
DataFrame
4377
+ The first `n` rows ordered by the given columns in descending
4378
+ order.
4379
+
4380
+ See Also
4381
+ --------
4382
+ DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in
4383
+ ascending order.
4384
+ DataFrame.sort_values : Sort DataFrame by the values
4385
+ DataFrame.head : Return the first `n` rows without re-ordering.
4386
+
4387
+ Notes
4388
+ -----
4389
+ This function cannot be used with all column types. For example, when
4390
+ specifying columns with `object` or `category` dtypes, ``TypeError`` is
4391
+ raised.
4368
4392
4369
4393
Examples
4370
4394
--------
4371
- >>> df = pd.DataFrame({'a': [1, 10, 8, 11 , -1],
4395
+ >>> df = pd.DataFrame({'a': [1, 10, 8, 10 , -1],
4372
4396
... 'b': list('abdce'),
4373
4397
... 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
4398
+ >>> df
4399
+ a b c
4400
+ 0 1 a 1.0
4401
+ 1 10 b 2.0
4402
+ 2 8 d NaN
4403
+ 3 10 c 3.0
4404
+ 4 -1 e 4.0
4405
+
4406
+ In the following example, we will use ``nlargest`` to select the three
4407
+ rows having the largest values in column "a".
4408
+
4374
4409
>>> df.nlargest(3, 'a')
4375
- a b c
4376
- 3 11 c 3
4377
- 1 10 b 2
4378
- 2 8 d NaN
4410
+ a b c
4411
+ 1 10 b 2.0
4412
+ 3 10 c 3.0
4413
+ 2 8 d NaN
4414
+
4415
+ When using ``keep='last'``, ties are resolved in reverse order:
4416
+
4417
+ >>> df.nlargest(3, 'a', keep='last')
4418
+ a b c
4419
+ 3 10 c 3.0
4420
+ 1 10 b 2.0
4421
+ 2 8 d NaN
4422
+
4423
+ To order by the largest values in column "a" and then "c", we can
4424
+ specify multiple columns like in the next example.
4425
+
4426
+ >>> df.nlargest(3, ['a', 'c'])
4427
+ a b c
4428
+ 3 10 c 3.0
4429
+ 1 10 b 2.0
4430
+ 2 8 d NaN
4431
+
4432
+ Attempting to use ``nlargest`` on non-numeric dtypes will raise a
4433
+ ``TypeError``:
4434
+
4435
+ >>> df.nlargest(3, 'b')
4436
+ Traceback (most recent call last):
4437
+ TypeError: Column 'b' has dtype object, cannot use method 'nlargest'
4379
4438
"""
4380
4439
return algorithms .SelectNFrame (self ,
4381
4440
n = n ,
0 commit comments