From e8395b946338c5a31a60418d5b8edadcbf25daf8 Mon Sep 17 00:00:00 2001
From: tdpetrou <petrou.theodore@gmail.com>
Date: Tue, 5 Dec 2017 22:16:03 -0500
Subject: [PATCH 1/4] added option keep=False to nlargests/nsmallest

---
 pandas/core/algorithms.py             | 10 +++++++---
 pandas/tests/frame/test_analytics.py  | 15 +++++++++++++++
 pandas/tests/series/test_analytics.py | 11 +++++++++++
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 0ceb8966fd3c8..80e47b8b05ac4 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -910,8 +910,8 @@ def __init__(self, obj, n, keep):
         self.n = n
         self.keep = keep
 
-        if self.keep not in ('first', 'last'):
-            raise ValueError('keep must be either "first", "last"')
+        if self.keep not in ('first', 'last', False):
+            raise ValueError('keep must be either "first", "last", or False')
 
     def nlargest(self):
         return self.compute('nlargest')
@@ -979,7 +979,11 @@ def compute(self, method):
 
         kth_val = algos.kth_smallest(arr.copy(), n - 1)
         ns, = np.nonzero(arr <= kth_val)
-        inds = ns[arr[ns].argsort(kind='mergesort')][:n]
+        inds = ns[arr[ns].argsort(kind='mergesort')]
+
+        if self.keep is not False:
+            inds = inds[:n]
+
         if self.keep == 'last':
             # reverse indices
             inds = narr - 1 - inds
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 4bba6d7601ae8..6d1409d4fc1d9 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -2202,6 +2202,21 @@ def test_n_duplicate_index(self, df_duplicates, n, order):
         expected = df.sort_values(order, ascending=False).head(n)
         tm.assert_frame_equal(result, expected)
 
+    def test_keep_false(self):
+        df = pd.DataFrame({'a': [5, 4, 4, 2, 3, 3, 3, 3],
+                           'b': [10, 9, 8, 7, 5, 50, 10, 20]})
+        result = df.nlargest(4, 'a', keep=False)
+        expected = pd.DataFrame({'a': {0: 5, 1: 4, 2: 4, 4: 3,
+                                       5: 3, 6: 3, 7: 3},
+                                 'b': {0: 10, 1: 9, 2: 8, 4: 5,
+                                       5: 50, 6: 10, 7: 20}})
+        tm.assert_frame_equal(result, expected)
+
+        result = df.nsmallest(2, 'a', keep=False)
+        expected = pd.DataFrame({'a': {3: 2, 4: 3, 5: 3, 6: 3, 7: 3},
+                                 'b': {3: 7, 4: 5, 5: 50, 6: 10, 7: 20}})
+        tm.assert_frame_equal(result, expected)
+
     def test_series_broadcasting(self):
         # smoke test for numpy warnings
         # GH 16378, GH 16306
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 289b5c01c1263..d9d3f777e84ac 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1867,6 +1867,17 @@ def test_n(self, n):
         expected = s.sort_values().head(n)
         assert_series_equal(result, expected)
 
+    def test_keep_false(self):
+        s = Series([10, 9, 8, 7, 7, 7, 7, 6])
+        result = s.nlargest(4, keep=False)
+        expected = Series([10, 9, 8, 7, 7, 7, 7])
+        print(result, expected)
+        assert_series_equal(result, expected)
+
+        result = s.nsmallest(2, keep=False)
+        expected = Series([6, 7, 7, 7, 7], index=[7, 3, 4, 5, 6])
+        assert_series_equal(result, expected)
+
 
 class TestCategoricalSeriesAnalytics(object):
 

From 5b1e7b046f733374a6a25a0c70e7cb55a50c71e4 Mon Sep 17 00:00:00 2001
From: tdpetrou <petrou.theodore@gmail.com>
Date: Wed, 13 Dec 2017 12:28:00 -0500
Subject: [PATCH 2/4] add "all" argument for nlargest/nsmallest

---
 pandas/core/algorithms.py             |  6 +--
 pandas/core/frame.py                  | 58 +++++++++++++++++++++------
 pandas/tests/frame/test_analytics.py  |  7 ++--
 pandas/tests/series/test_analytics.py |  7 ++--
 4 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 80e47b8b05ac4..099fec74d266c 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -910,8 +910,8 @@ def __init__(self, obj, n, keep):
         self.n = n
         self.keep = keep
 
-        if self.keep not in ('first', 'last', False):
-            raise ValueError('keep must be either "first", "last", or False')
+        if self.keep not in ('first', 'last', 'all'):
+            raise ValueError('keep must be either "first", "last", or "all"')
 
     def nlargest(self):
         return self.compute('nlargest')
@@ -981,7 +981,7 @@ def compute(self, method):
         ns, = np.nonzero(arr <= kth_val)
         inds = ns[arr[ns].argsort(kind='mergesort')]
 
-        if self.keep is not False:
+        if self.keep != 'all':
             inds = inds[:n]
 
         if self.keep == 'last':
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5f323d0f040bc..98b6801c5f394 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3773,6 +3773,9 @@ def nlargest(self, n, columns, keep='first'):
             Where there are duplicate values:
             - ``first`` : take the first occurrence.
             - ``last`` : take the last occurrence.
+            - ``all`` : keep all ties of nth largest value.
+
+            .. versionadded:: 0.22.0
 
         Returns
         -------
@@ -3780,14 +3783,28 @@ def nlargest(self, n, columns, keep='first'):
 
         Examples
         --------
-        >>> df = DataFrame({'a': [1, 10, 8, 11, -1],
-        ...                 'b': list('abdce'),
-        ...                 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
-        >>> df.nlargest(3, 'a')
+        >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2],
+        ...                    'b': list('abdcef'),
+        ...                    'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]})
+
+        >>> df.nlargest(3, 'a', keep='first')
+            a  b   c
+        3  11  c   3
+        1  10  b   2
+        2   8  d NaN
+
+        >>> df.nlargest(3, 'a', keep='last')
+            a  b   c
+        3  11  c   3
+        1  10  b   2
+        4   8  e   4
+
+        >>> df.nlargest(3, 'a', keep='all')
             a  b   c
         3  11  c   3
         1  10  b   2
         2   8  d NaN
+        4   8  e   4
         """
         return algorithms.SelectNFrame(self,
                                        n=n,
@@ -3808,6 +3825,9 @@ def nsmallest(self, n, columns, keep='first'):
             Where there are duplicate values:
             - ``first`` : take the first occurrence.
             - ``last`` : take the last occurrence.
+            - ``all`` : keep all ties of nth largest value.
+
+            .. versionadded:: 0.22.0
 
         Returns
         -------
@@ -3815,14 +3835,28 @@ def nsmallest(self, n, columns, keep='first'):
 
         Examples
         --------
-        >>> df = DataFrame({'a': [1, 10, 8, 11, -1],
-        ...                 'b': list('abdce'),
-        ...                 'c': [1.0, 2.0, np.nan, 3.0, 4.0]})
-        >>> df.nsmallest(3, 'a')
-           a  b   c
-        4 -1  e   4
-        0  1  a   1
-        2  8  d NaN
+        >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2],
+        ...                    'b': list('abdcef'),
+        ...                    'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]})
+
+        >>> df.nsmallest(3, 'a', keep='first')
+           a  b    c
+        0  1  a  1.0
+        5  2  f  9.0
+        2  8  d  NaN
+
+        >>> df.nsmallest(3, 'a', keep='last')
+           a  b    c
+        0  1  a  1.0
+        5  2  f  9.0
+        4  8  e  4.0
+
+        >>> df.nsmallest(3, 'a', keep='all')
+           a  b    c
+        0  1  a  1.0
+        5  2  f  9.0
+        2  8  d  NaN
+        4  8  e  4.0
         """
         return algorithms.SelectNFrame(self,
                                        n=n,
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 6d1409d4fc1d9..c038d76879ce1 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -2202,17 +2202,18 @@ def test_n_duplicate_index(self, df_duplicates, n, order):
         expected = df.sort_values(order, ascending=False).head(n)
         tm.assert_frame_equal(result, expected)
 
-    def test_keep_false(self):
+    def test_keep_all_ties(self):
+        # GH 16818
         df = pd.DataFrame({'a': [5, 4, 4, 2, 3, 3, 3, 3],
                            'b': [10, 9, 8, 7, 5, 50, 10, 20]})
-        result = df.nlargest(4, 'a', keep=False)
+        result = df.nlargest(4, 'a', keep='all')
         expected = pd.DataFrame({'a': {0: 5, 1: 4, 2: 4, 4: 3,
                                        5: 3, 6: 3, 7: 3},
                                  'b': {0: 10, 1: 9, 2: 8, 4: 5,
                                        5: 50, 6: 10, 7: 20}})
         tm.assert_frame_equal(result, expected)
 
-        result = df.nsmallest(2, 'a', keep=False)
+        result = df.nsmallest(2, 'a', keep='all')
         expected = pd.DataFrame({'a': {3: 2, 4: 3, 5: 3, 6: 3, 7: 3},
                                  'b': {3: 7, 4: 5, 5: 50, 6: 10, 7: 20}})
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index d9d3f777e84ac..2e4a84f8bcd6b 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1867,14 +1867,15 @@ def test_n(self, n):
         expected = s.sort_values().head(n)
         assert_series_equal(result, expected)
 
-    def test_keep_false(self):
+    def test_keep_all_ties(self):
+        # GH 16818
         s = Series([10, 9, 8, 7, 7, 7, 7, 6])
-        result = s.nlargest(4, keep=False)
+        result = s.nlargest(4, keep='all')
         expected = Series([10, 9, 8, 7, 7, 7, 7])
         print(result, expected)
         assert_series_equal(result, expected)
 
-        result = s.nsmallest(2, keep=False)
+        result = s.nsmallest(2, keep='all')
         expected = Series([6, 7, 7, 7, 7], index=[7, 3, 4, 5, 6])
         assert_series_equal(result, expected)
 

From 5cd3a8dc97cb5e66a90e0e560ddf881b598af5d0 Mon Sep 17 00:00:00 2001
From: tdpetrou <petrou.theodore@gmail.com>
Date: Wed, 13 Dec 2017 12:56:10 -0500
Subject: [PATCH 3/4] added whatsnew and cleaned up docstrings

---
 doc/source/whatsnew/v0.22.0.txt |  2 ++
 pandas/core/frame.py            | 16 ++++++++--------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
index ab7f18bce47d3..53d8aa5946845 100644
--- a/doc/source/whatsnew/v0.22.0.txt
+++ b/doc/source/whatsnew/v0.22.0.txt
@@ -138,6 +138,8 @@ Other Enhancements
 - :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`)
 - :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`)
 - :func:``DataFrame.to_json`` and ``Series.to_json`` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`)
+- :func:`Series` / :func:`DataFrame` methods :func:`nlargest` / :func:`nsmallest` now accept the value 'all' for the `keep` argument. This keeps all ties for the nth largests/smallest value (:issue:`16818`).
+
 
 .. _whatsnew_0220.api_breaking:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 98b6801c5f394..14c81975ab159 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3769,11 +3769,11 @@ def nlargest(self, n, columns, keep='first'):
             Number of items to retrieve
         columns : list or str
             Column name or names to order by
-        keep : {'first', 'last'}, default 'first'
+        keep : {'first', 'last', 'all'}, default 'first'
             Where there are duplicate values:
-            - ``first`` : take the first occurrence.
-            - ``last`` : take the last occurrence.
-            - ``all`` : keep all ties of nth largest value.
+            - 'first' : take the first occurrence.
+            - 'last' : take the last occurrence.
+            - 'all' : keep all ties of nth largest value.
 
             .. versionadded:: 0.22.0
 
@@ -3821,11 +3821,11 @@ def nsmallest(self, n, columns, keep='first'):
             Number of items to retrieve
         columns : list or str
             Column name or names to order by
-        keep : {'first', 'last'}, default 'first'
+        keep : {'first', 'last', 'all'}, default 'first'
             Where there are duplicate values:
-            - ``first`` : take the first occurrence.
-            - ``last`` : take the last occurrence.
-            - ``all`` : keep all ties of nth largest value.
+            - 'first' : take the first occurrence.
+            - 'last' : take the last occurrence.
+            - 'all' : keep all ties of nth largest value.
 
             .. versionadded:: 0.22.0
 

From 56954b4ec614d650ed6d239ba219746bd56c4098 Mon Sep 17 00:00:00 2001
From: tdpetrou <petrou.theodore@gmail.com>
Date: Wed, 13 Dec 2017 13:00:50 -0500
Subject: [PATCH 4/4] cleaned up docstrings

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 14c81975ab159..d1441da0d810f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3825,7 +3825,7 @@ def nsmallest(self, n, columns, keep='first'):
             Where there are duplicate values:
             - 'first' : take the first occurrence.
             - 'last' : take the last occurrence.
-            - 'all' : keep all ties of nth largest value.
+            - 'all' : keep all ties of nth smallest value.
 
             .. versionadded:: 0.22.0