From 1e5a47070746851466dd433754a6d7b07395630f Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 31 Dec 2021 23:29:47 +0100 Subject: [PATCH] BUG: groupby nunique not respecting observed --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/groupby/generic.py | 1 + pandas/tests/groupby/test_categorical.py | 15 +++++++++++++++ 3 files changed, 17 insertions(+) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index d770782d5dc62..0fca467edd1af 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -943,6 +943,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`) - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`) +- Bug in :meth:`Groupby.nunique` not respecting ``observed=True`` for Categorical grouping columns (:issue:`45128`) - Bug in :meth:`GroupBy.head` and :meth:`GroupBy.tail` not dropping groups with ``NaN`` when ``dropna=True`` (:issue:`45089`) - Fixed bug in :meth:`GroupBy.__iter__` after selecting a subset of columns in a :class:`GroupBy` object, which returned all columns instead of the chosen subset (:issue:`#44821`) - Bug in :meth:`Groupby.rolling` when non-monotonic data passed, fails to correctly raise ``ValueError`` (:issue:`43909`) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9b341845c7170..81c5e74957c62 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1449,6 +1449,7 @@ def _iterate_column_groupbys(self, obj: DataFrame | Series): selection=colname, grouper=self.grouper, exclusions=self.exclusions, + observed=self.observed, ) def _apply_to_column_groupbys(self, func, obj: DataFrame | Series) -> DataFrame: diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 5ae929be1d8cf..3d5016b058c07 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1779,3 +1779,18 @@ def test_groupby_last_first_preserve_categoricaldtype(func): Categorical([1, 2, 3]), name="b", index=Index([1, 2, 3], name="a") ) tm.assert_series_equal(expected, result) + + +def test_groupby_categorical_observed_nunique(): + # GH#45128 + df = DataFrame({"a": [1, 2], "b": [1, 2], "c": [10, 11]}) + df = df.astype(dtype={"a": "category", "b": "category"}) + result = df.groupby(["a", "b"], observed=True).nunique()["c"] + expected = Series( + [1, 1], + index=MultiIndex.from_arrays( + [CategoricalIndex([1, 2], name="a"), CategoricalIndex([1, 2], name="b")] + ), + name="c", + ) + tm.assert_series_equal(result, expected)