Skip to content

Commit e5f5f28

Browse files
rhshadrachtopper-123
authored andcommitted
BUG: groupby.nth after selection (pandas-dev#53519)
1 parent d0ea131 commit e5f5f28

File tree

3 files changed

+28
-13
lines changed

3 files changed

+28
-13
lines changed

doc/source/whatsnew/v2.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,8 @@ Groupby/resample/rolling
443443
- Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`)
444444
- Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`)
445445
- Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`)
446+
- Bug in :meth:`SeriresGroupBy.nth` and :meth:`DataFrameGroupBy.nth` after performing column selection when using ``dropna="any"`` or ``dropna="all"`` would not subset columns (:issue:`53518`)
447+
- Bug in :meth:`SeriresGroupBy.nth` and :meth:`DataFrameGroupBy.nth` raised after performing column selection when using ``dropna="any"`` or ``dropna="all"`` resulted in rows being dropped (:issue:`53518`)
446448

447449
Reshaping
448450
^^^^^^^^^

pandas/core/groupby/groupby.py

+5-13
Original file line numberDiff line numberDiff line change
@@ -3201,11 +3201,14 @@ def _nth(
32013201
# old behaviour, but with all and any support for DataFrames.
32023202
# modified in GH 7559 to have better perf
32033203
n = cast(int, n)
3204-
dropped = self.obj.dropna(how=dropna, axis=self.axis)
3204+
dropped = self._selected_obj.dropna(how=dropna, axis=self.axis)
32053205

32063206
# get a new grouper for our dropped obj
32073207
grouper: np.ndarray | Index | ops.BaseGrouper
3208-
if self.keys is None and self.level is None:
3208+
if len(dropped) == len(self._selected_obj):
3209+
# Nothing was dropped, can use the same grouper
3210+
grouper = self.grouper
3211+
else:
32093212
# we don't have the grouper info available
32103213
# (e.g. we have selected out
32113214
# a column that is not in the current object)
@@ -3219,17 +3222,6 @@ def _nth(
32193222
values = np.where(nulls, NA, grouper) # type: ignore[call-overload]
32203223
grouper = Index(values, dtype="Int64")
32213224

3222-
else:
3223-
# create a grouper with the original parameters, but on dropped
3224-
# object
3225-
grouper, _, _ = get_grouper(
3226-
dropped,
3227-
key=self.keys,
3228-
axis=self.axis,
3229-
level=self.level,
3230-
sort=self.sort,
3231-
)
3232-
32333225
if self.axis == 1:
32343226
grb = dropped.T.groupby(grouper, as_index=self.as_index, sort=self.sort)
32353227
else:

pandas/tests/groupby/test_nth.py

+21
Original file line numberDiff line numberDiff line change
@@ -852,3 +852,24 @@ def test_head_tail_dropna_false():
852852

853853
result = df.groupby(["X", "Y"], dropna=False).nth(n=0)
854854
tm.assert_frame_equal(result, expected)
855+
856+
857+
@pytest.mark.parametrize("selection", ("b", ["b"], ["b", "c"]))
858+
@pytest.mark.parametrize("dropna", ["any", "all", None])
859+
def test_nth_after_selection(selection, dropna):
860+
# GH#11038, GH#53518
861+
df = DataFrame(
862+
{
863+
"a": [1, 1, 2],
864+
"b": [np.nan, 3, 4],
865+
"c": [5, 6, 7],
866+
}
867+
)
868+
gb = df.groupby("a")[selection]
869+
result = gb.nth(0, dropna=dropna)
870+
if dropna == "any" or (dropna == "all" and selection != ["b", "c"]):
871+
locs = [1, 2]
872+
else:
873+
locs = [0, 2]
874+
expected = df.loc[locs, selection]
875+
tm.assert_equal(result, expected)

0 commit comments

Comments
 (0)