BUG: groupby.nth after selection (pandas-dev#53519)

rhshadrach · topper-123 · commit e5f5f288a25c · 2023-06-05T23:01:58.000+01:00
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -443,6 +443,8 @@ Groupby/resample/rolling
 - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`)
 - Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`)
 - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`)
+- Bug in :meth:`SeriresGroupBy.nth` and :meth:`DataFrameGroupBy.nth` after performing column selection when using ``dropna="any"`` or ``dropna="all"`` would not subset columns (:issue:`53518`)
+- Bug in :meth:`SeriresGroupBy.nth` and :meth:`DataFrameGroupBy.nth` raised after performing column selection when using ``dropna="any"`` or ``dropna="all"`` resulted in rows being dropped (:issue:`53518`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -3201,11 +3201,14 @@ def _nth(
         # old behaviour, but with all and any support for DataFrames.
         # modified in GH 7559 to have better perf
         n = cast(int, n)
-        dropped = self.obj.dropna(how=dropna, axis=self.axis)
+        dropped = self._selected_obj.dropna(how=dropna, axis=self.axis)
 
         # get a new grouper for our dropped obj
         grouper: np.ndarray | Index | ops.BaseGrouper
-        if self.keys is None and self.level is None:
+        if len(dropped) == len(self._selected_obj):
+            # Nothing was dropped, can use the same grouper
+            grouper = self.grouper
+        else:
             # we don't have the grouper info available
             # (e.g. we have selected out
             # a column that is not in the current object)
@@ -3219,17 +3222,6 @@ def _nth(
                 values = np.where(nulls, NA, grouper)  # type: ignore[call-overload]
                 grouper = Index(values, dtype="Int64")
 
-        else:
-            # create a grouper with the original parameters, but on dropped
-            # object
-            grouper, _, _ = get_grouper(
-                dropped,
-                key=self.keys,
-                axis=self.axis,
-                level=self.level,
-                sort=self.sort,
-            )
-
         if self.axis == 1:
             grb = dropped.T.groupby(grouper, as_index=self.as_index, sort=self.sort)
         else:
diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py
@@ -852,3 +852,24 @@ def test_head_tail_dropna_false():
 
     result = df.groupby(["X", "Y"], dropna=False).nth(n=0)
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("selection", ("b", ["b"], ["b", "c"]))
+@pytest.mark.parametrize("dropna", ["any", "all", None])
+def test_nth_after_selection(selection, dropna):
+    # GH#11038, GH#53518
+    df = DataFrame(
+        {
+            "a": [1, 1, 2],
+            "b": [np.nan, 3, 4],
+            "c": [5, 6, 7],
+        }
+    )
+    gb = df.groupby("a")[selection]
+    result = gb.nth(0, dropna=dropna)
+    if dropna == "any" or (dropna == "all" and selection != ["b", "c"]):
+        locs = [1, 2]
+    else:
+        locs = [0, 2]
+    expected = df.loc[locs, selection]
+    tm.assert_equal(result, expected)