pandas-dev · jreback · May 5, 2019 · Apr 17, 2019 · Apr 19, 2019 · Apr 19, 2019
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -389,6 +389,7 @@ Groupby/Resample/Rolling
 - Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`, :issue:`25167`)
 - Bug in :meth:`pandas.core.window.Rolling.min` and :meth:`pandas.core.window.Rolling.max` that caused a memory leak (:issue:`25893`)
 - Bug in :func:`idxmax` and :func:`idxmin` on :meth:`DataFrame.groupby` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`)
+- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`)
 
 Reshaping
 ^^^^^^^^^

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -12,7 +12,7 @@ class providing the base-class of operations.
 import datetime
 from functools import partial, wraps
 import types
-from typing import FrozenSet, Optional, Tuple, Type
+from typing import cast, FrozenSet, List, Optional, Tuple, Type, Union
 import warnings
 
 import numpy as np
@@ -1546,15 +1546,16 @@ def backfill(self, limit=None):
 
     @Substitution(name='groupby')
     @Substitution(see_also=_common_see_also)
-    def nth(self, n, dropna=None):
+    def nth(self,
+            n: Union[int, List[int]],
+            dropna: Optional[str] = None) -> DataFrame:
         """
         Take the nth row from each group if n is an int, or a subset of rows
         if n is a list of ints.
 
         If dropna, will take the nth non-null row, dropna is either
-        Truthy (if a Series) or 'all', 'any' (if a DataFrame);
-        this is equivalent to calling dropna(how=dropna) before the
-        groupby.
+        'all' or 'any'; this is equivalent to calling dropna(how=dropna)
+        before the groupby.
 
         Parameters
         ----------
@@ -1636,11 +1637,13 @@ def nth(self, n, dropna=None):
                                  -nth_values)
             mask = mask_left | mask_right
 
+            ids, _, _ = self.grouper.group_info
+            mask = mask & (ids != -1)  # Drop NA values in grouping
+
             out = self._selected_obj[mask]
             if not self.as_index:
                 return out
 
-            ids, _, _ = self.grouper.group_info
             out.index = self.grouper.result_index[ids[mask]]
 
             return out.sort_index() if self.sort else out
@@ -1665,6 +1668,7 @@ def nth(self, n, dropna=None):
 
         # old behaviour, but with all and any support for DataFrames.
         # modified in GH 7559 to have better perf
+        n = cast(int, n)
         max_len = n if n >= 0 else - 1 - n
         dropped = self.obj.dropna(how=dropna, axis=self.axis)
 

diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py
@@ -434,3 +434,20 @@ def test_nth_column_order():
                          columns=['C', 'B'],
                          index=Index([1, 2], name='A'))
     assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dropna", [None, 'any', 'all'])
+def test_nth_nan_in_grouper(dropna):
+    # GH 26011
+    df = DataFrame([
+        [np.nan, 0, 1],
+        ['abc', 2, 3],
+        [np.nan, 4, 5],
+        ['def', 6, 7],
+        [np.nan, 8, 9],
+    ], columns=list('abc'))
+    result = df.groupby('a').nth(0, dropna=dropna)
+    expected = pd.DataFrame([[2, 3], [6, 7]], columns=list('bc'),
+                            index=Index(['abc', 'def'], name='a'))
+
+    assert_frame_equal(result, expected)