Skip to content

Commit d2031d7

Browse files
WillAydTomAugspurger
authored andcommitted
Fix GroupBy nth Handling with Observed=False (#26419)
* Added test coverage for observed=False with ops * Fixed issue with observed=False and nth
1 parent 8110b47 commit d2031d7

File tree

3 files changed

+21
-1
lines changed

3 files changed

+21
-1
lines changed

doc/source/whatsnew/v0.25.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ Groupby/resample/rolling
121121
^^^^^^^^^^^^^^^^^^^^^^^^
122122

123123
- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`)
124+
- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where ``observed=False`` was being ignored for Categorical groupers (:issue:`26385`)
124125
- Bug in windowing over read-only arrays (:issue:`27766`)
125126
- Fixed segfault in `pandas.core.groupby.DataFrameGroupBy.quantile` when an invalid quantile was passed (:issue:`27470`)
126127
-

pandas/core/groupby/groupby.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1773,7 +1773,11 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra
17731773
if not self.as_index:
17741774
return out
17751775

1776-
out.index = self.grouper.result_index[ids[mask]]
1776+
result_index = self.grouper.result_index
1777+
out.index = result_index[ids[mask]]
1778+
1779+
if not self.observed and isinstance(result_index, CategoricalIndex):
1780+
out = out.reindex(result_index)
17771781

17781782
return out.sort_index() if self.sort else out
17791783

pandas/tests/groupby/test_categorical.py

+15
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,21 @@ def test_observed_groups_with_nan(observed):
434434
tm.assert_dict_equal(result, expected)
435435

436436

437+
def test_observed_nth():
438+
# GH 26385
439+
cat = pd.Categorical(["a", np.nan, np.nan], categories=["a", "b", "c"])
440+
ser = pd.Series([1, 2, 3])
441+
df = pd.DataFrame({"cat": cat, "ser": ser})
442+
443+
result = df.groupby("cat", observed=False)["ser"].nth(0)
444+
445+
index = pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"])
446+
expected = pd.Series([1, np.nan, np.nan], index=index, name="ser")
447+
expected.index.name = "cat"
448+
449+
tm.assert_series_equal(result, expected)
450+
451+
437452
def test_dataframe_categorical_with_nan(observed):
438453
# GH 21151
439454
s1 = Categorical([np.nan, "a", np.nan, "a"], categories=["a", "b", "c"])

0 commit comments

Comments
 (0)