Skip to content

Commit 8f530a1

Browse files
WillAydMeeseeksDev[bot]
authored and
MeeseeksDev[bot]
committed
Backport PR pandas-dev#26419: Fix GroupBy nth Handling with Observed=False
1 parent ca6b973 commit 8f530a1

File tree

3 files changed

+21
-1
lines changed

3 files changed

+21
-1
lines changed

doc/source/whatsnew/v0.25.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ Groupby/resample/rolling
122122
^^^^^^^^^^^^^^^^^^^^^^^^
123123

124124
- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`)
125+
- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where ``observed=False`` was being ignored for Categorical groupers (:issue:`26385`)
125126
- Bug in windowing over read-only arrays (:issue:`27766`)
126127
-
127128
-

pandas/core/groupby/groupby.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1771,7 +1771,11 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra
17711771
if not self.as_index:
17721772
return out
17731773

1774-
out.index = self.grouper.result_index[ids[mask]]
1774+
result_index = self.grouper.result_index
1775+
out.index = result_index[ids[mask]]
1776+
1777+
if not self.observed and isinstance(result_index, CategoricalIndex):
1778+
out = out.reindex(result_index)
17751779

17761780
return out.sort_index() if self.sort else out
17771781

pandas/tests/groupby/test_categorical.py

+15
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,21 @@ def test_observed_groups_with_nan(observed):
434434
tm.assert_dict_equal(result, expected)
435435

436436

437+
def test_observed_nth():
438+
# GH 26385
439+
cat = pd.Categorical(["a", np.nan, np.nan], categories=["a", "b", "c"])
440+
ser = pd.Series([1, 2, 3])
441+
df = pd.DataFrame({"cat": cat, "ser": ser})
442+
443+
result = df.groupby("cat", observed=False)["ser"].nth(0)
444+
445+
index = pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"])
446+
expected = pd.Series([1, np.nan, np.nan], index=index, name="ser")
447+
expected.index.name = "cat"
448+
449+
tm.assert_series_equal(result, expected)
450+
451+
437452
def test_dataframe_categorical_with_nan(observed):
438453
# GH 21151
439454
s1 = Categorical([np.nan, "a", np.nan, "a"], categories=["a", "b", "c"])

0 commit comments

Comments
 (0)