diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index bb28ce9b67a3e..895cc17867cc8 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -117,6 +117,7 @@ Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`) +- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where ``observed=False`` was being ignored for Categorical groupers (:issue:`26385`) - Bug in windowing over read-only arrays (:issue:`27766`) - Fixed segfault in `pandas.core.groupby.DataFrameGroupBy.quantile` when an invalid quantile was passed (:issue:`27470`) - diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c5e81e21e9fd5..d68557853db08 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1773,7 +1773,11 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra if not self.as_index: return out - out.index = self.grouper.result_index[ids[mask]] + result_index = self.grouper.result_index + out.index = result_index[ids[mask]] + + if not self.observed and isinstance(result_index, CategoricalIndex): + out = out.reindex(result_index) return out.sort_index() if self.sort else out diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index b5c2de267869d..e09af3fd48ee6 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -434,6 +434,21 @@ def test_observed_groups_with_nan(observed): tm.assert_dict_equal(result, expected) +def test_observed_nth(): + # GH 26385 + cat = pd.Categorical(["a", np.nan, np.nan], categories=["a", "b", "c"]) + ser = pd.Series([1, 2, 3]) + df = pd.DataFrame({"cat": cat, "ser": ser}) + + result = df.groupby("cat", observed=False)["ser"].nth(0) + + index = pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]) + expected = pd.Series([1, np.nan, np.nan], index=index, name="ser") + expected.index.name = "cat" + + tm.assert_series_equal(result, expected) + + def test_dataframe_categorical_with_nan(observed): # GH 21151 s1 = Categorical([np.nan, "a", np.nan, "a"], categories=["a", "b", "c"])