From 7f15d7a2f29006a855ed6ee43f7b4911f5e5fe99 Mon Sep 17 00:00:00 2001
From: Carlos Eduardo Moreira dos Santos <cems@cemshost.com.br>
Date: Sat, 10 Mar 2018 19:11:19 -0300
Subject: [PATCH 1/4] DOC: Improve the docstring of DataFrame.nlargest

Co-authored-by: Igor C. A. de Lima <igorcadelima@gmail.com>
Signed-off-by: Carlos Eduardo Moreira dos Santos <cems@cemshost.com.br>
Signed-off-by: Igor C. A. de Lima <igorcadelima@gmail.com>
---
 pandas/core/frame.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a66d00fff9714..b5abced106831 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3835,23 +3835,33 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False,
                                inplace=inplace, sort_remaining=sort_remaining)
 
     def nlargest(self, n, columns, keep='first'):
-        """Get the rows of a DataFrame sorted by the `n` largest
-        values of `columns`.
+        """
+        Return the `n` largest rows sorted by `columns`.
+
+        Sort the DataFrame by `columns` in descending order and return the top
+        `n` rows.
 
         Parameters
         ----------
         n : int
-            Number of items to retrieve
+            Number of items to retrieve.
         columns : list or str
-            Column name or names to order by
+            Column name or names to retrieve values from.
         keep : {'first', 'last'}, default 'first'
             Where there are duplicate values:
-            - ``first`` : take the first occurrence.
-            - ``last`` : take the last occurrence.
+            - `first` : take the first occurrence;
+            - `last` : take the last occurrence.
 
         Returns
         -------
         DataFrame
+            The `n` largest rows in the DataFrame, sorted by the given columns
+            in descending order.
+
+        See Also
+        --------
+        DataFrame.nsmallest : Return the `n` smallest rows sorted by given
+            columns.
 
         Examples
         --------
@@ -3859,10 +3869,10 @@ def nlargest(self, n, columns, keep='first'):
         ...                    'b': list('abdce'),
         ...                    'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
         >>> df.nlargest(3, 'a')
-            a  b   c
-        3  11  c   3
-        1  10  b   2
-        2   8  d NaN
+            a  b    c
+        3  11  c  3.0
+        1  10  b  2.0
+        2   8  d  NaN
         """
         return algorithms.SelectNFrame(self,
                                        n=n,

From 873efa237ec57e6ece3a4bcf587b3815a30db706 Mon Sep 17 00:00:00 2001
From: Carlos Eduardo Moreira dos Santos <cems@cemshost.com.br>
Date: Sun, 11 Mar 2018 20:49:48 -0300
Subject: [PATCH 2/4] DOC: DataFrame.nlargest - apply review suggestions

---
 pandas/core/frame.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b5abced106831..86f60b39b6ecd 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3836,10 +3836,11 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False,
 
     def nlargest(self, n, columns, keep='first'):
         """
-        Return the `n` largest rows sorted by `columns`.
+        Return the `n` largest rows ordered by `columns`.
 
-        Sort the DataFrame by `columns` in descending order and return the top
-        `n` rows.
+        Return the `n` largest rows of `columns` in descending order. The
+        remaining columns, although not used for ordering, are returned as
+        well.
 
         Parameters
         ----------
@@ -3855,22 +3856,36 @@ def nlargest(self, n, columns, keep='first'):
         Returns
         -------
         DataFrame
-            The `n` largest rows in the DataFrame, sorted by the given columns
+            The `n` largest rows in the DataFrame, ordered by the given columns
             in descending order.
 
         See Also
         --------
-        DataFrame.nsmallest : Return the `n` smallest rows sorted by given
+        DataFrame.nsmallest : Return the `n` smallest rows ordered by the given
             columns.
 
         Examples
         --------
-        >>> df = pd.DataFrame({'a': [1, 10, 8, 11, -1],
+        >>> df = pd.DataFrame({'a': [1, 10, 8, 10, -1],
         ...                    'b': list('abdce'),
         ...                    'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
+        >>> df
+            a  b    c
+        0   1  a  1.0
+        1  10  b  2.0
+        2   8  d  NaN
+        3  10  c  3.0
+        4  -1  e  4.0
+
         >>> df.nlargest(3, 'a')
             a  b    c
-        3  11  c  3.0
+        1  10  b  2.0
+        3  10  c  3.0
+        2   8  d  NaN
+
+        >>> df.nlargest(3, 'a', keep='last')
+            a  b    c
+        3  10  c  3.0
         1  10  b  2.0
         2   8  d  NaN
         """

From de08075a5ce8a546a8ac69c2c1dbe1a46321af45 Mon Sep 17 00:00:00 2001
From: Carlos Eduardo Moreira dos Santos <cems@cemshost.com.br>
Date: Mon, 12 Mar 2018 20:58:35 -0300
Subject: [PATCH 3/4] DOC: DataFrame.nlargest - apply review suggestions

---
 pandas/core/frame.py | 54 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 13 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 86f60b39b6ecd..ed352a14ad05b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3836,33 +3836,40 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False,
 
     def nlargest(self, n, columns, keep='first'):
         """
-        Return the `n` largest rows ordered by `columns`.
+        Return the `n` first rows ordered by `columns` in descending order.
 
-        Return the `n` largest rows of `columns` in descending order. The
-        remaining columns, although not used for ordering, are returned as
-        well.
+        Return the `n` first rows with the largest values in `columns`, in
+        descending order. The columns that are not specified are returned as
+        well, but not used for ordering.
 
         Parameters
         ----------
         n : int
-            Number of items to retrieve.
-        columns : list or str
-            Column name or names to retrieve values from.
+            Number of rows to return.
+        columns : iterable or single value
+            Column label(s) to order by.
         keep : {'first', 'last'}, default 'first'
             Where there are duplicate values:
-            - `first` : take the first occurrence;
-            - `last` : take the last occurrence.
+
+            - `first` : prioritize the first occurrence(s)
+            - `last` : prioritize the last occurrence(s)
 
         Returns
         -------
         DataFrame
-            The `n` largest rows in the DataFrame, ordered by the given columns
-            in descending order.
+            The `n` first rows ordered by the given columns in descending
+            order.
 
         See Also
         --------
-        DataFrame.nsmallest : Return the `n` smallest rows ordered by the given
-            columns.
+        DataFrame.nsmallest : Return the `n` first rows ordered by `columns` in
+            ascending order.
+
+        Notes
+        -----
+        This function cannot be used with all column types. For example, when
+        specifying columns with `object` or `category` dtypes, ``TypeError`` is
+        raised.
 
         Examples
         --------
@@ -3877,17 +3884,38 @@ def nlargest(self, n, columns, keep='first'):
         3  10  c  3.0
         4  -1  e  4.0
 
+        In the following example, we will use ``nlargest`` to select the three
+        rows having the largest values in column "a".
+
         >>> df.nlargest(3, 'a')
             a  b    c
         1  10  b  2.0
         3  10  c  3.0
         2   8  d  NaN
 
+        When using ``keep='last'``, ties are resolved in reverse order:
+
         >>> df.nlargest(3, 'a', keep='last')
             a  b    c
         3  10  c  3.0
         1  10  b  2.0
         2   8  d  NaN
+        
+        To order by the largest values in column "a" and then "c", we can
+        specify multiple columns like in the next example.
+
+        >>> df.nlargest(3, ['a', 'c'])
+            a  b    c
+        3  10  c  3.0
+        1  10  b  2.0
+        2   8  d  NaN
+
+        The dtype of column "b" is `object` and attempting to get its largest
+        values raises a ``TypeError`` exception:
+
+        >>> df.nlargest(3, 'b')
+        Traceback (most recent call last):
+        TypeError: Column 'b' has dtype object, cannot use method 'nlargest' with this dtype
         """
         return algorithms.SelectNFrame(self,
                                        n=n,

From 7dd4f02466cf9b35e76af2916dd599048e6d5281 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 17 Mar 2018 11:28:15 +0100
Subject: [PATCH 4/4] updates

---
 pandas/core/frame.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ed352a14ad05b..b2ef75ab7fdce 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3836,17 +3836,21 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False,
 
     def nlargest(self, n, columns, keep='first'):
         """
-        Return the `n` first rows ordered by `columns` in descending order.
+        Return the first `n` rows ordered by `columns` in descending order.
 
-        Return the `n` first rows with the largest values in `columns`, in
+        Return the first `n` rows with the largest values in `columns`, in
         descending order. The columns that are not specified are returned as
         well, but not used for ordering.
 
+        This method is equivalent to
+        ``df.sort_values(columns, ascending=False).head(n)``, but more
+        performant.
+
         Parameters
         ----------
         n : int
             Number of rows to return.
-        columns : iterable or single value
+        columns : label or list of labels
             Column label(s) to order by.
         keep : {'first', 'last'}, default 'first'
             Where there are duplicate values:
@@ -3857,13 +3861,15 @@ def nlargest(self, n, columns, keep='first'):
         Returns
         -------
         DataFrame
-            The `n` first rows ordered by the given columns in descending
+            The first `n` rows ordered by the given columns in descending
             order.
 
         See Also
         --------
-        DataFrame.nsmallest : Return the `n` first rows ordered by `columns` in
+        DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in
             ascending order.
+        DataFrame.sort_values : Sort DataFrame by the values
+        DataFrame.head : Return the first `n` rows without re-ordering.
 
         Notes
         -----
@@ -3900,7 +3906,7 @@ def nlargest(self, n, columns, keep='first'):
         3  10  c  3.0
         1  10  b  2.0
         2   8  d  NaN
-        
+
         To order by the largest values in column "a" and then "c", we can
         specify multiple columns like in the next example.
 
@@ -3910,12 +3916,12 @@ def nlargest(self, n, columns, keep='first'):
         1  10  b  2.0
         2   8  d  NaN
 
-        The dtype of column "b" is `object` and attempting to get its largest
-        values raises a ``TypeError`` exception:
+        Attempting to use ``nlargest`` on non-numeric dtypes will raise a
+        ``TypeError``:
 
         >>> df.nlargest(3, 'b')
         Traceback (most recent call last):
-        TypeError: Column 'b' has dtype object, cannot use method 'nlargest' with this dtype
+        TypeError: Column 'b' has dtype object, cannot use method 'nlargest'
         """
         return algorithms.SelectNFrame(self,
                                        n=n,