Skip to content

Commit 4eb4729

Browse files
authored
BUG: SeriesGroupBy.nunique raises when grouping with an empty categrorical (#50081)
* BUG: SeriesGroupBy.nunique raises when grouping with an empty categorical * dtype fixup
1 parent 7d79cc6 commit 4eb4729

File tree

3 files changed

+17
-1
lines changed

3 files changed

+17
-1
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,7 @@ Groupby/resample/rolling
811811
- Bug in :meth:`.DataFrameGroupBy.describe` would describe the group keys (:issue:`49256`)
812812
- Bug in :meth:`.SeriesGroupBy.describe` with ``as_index=False`` would have the incorrect shape (:issue:`49256`)
813813
- Bug in :class:`.DataFrameGroupBy` and :class:`.SeriesGroupBy` with ``dropna=False`` would drop NA values when the grouper was categorical (:issue:`36327`)
814+
- Bug in :meth:`.SeriesGroupBy.nunique` would incorrectly raise when the grouper was an empty categorical and ``observed=True`` (:issue:`21334`)
814815

815816
Reshaping
816817
^^^^^^^^^

pandas/core/groupby/generic.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -585,7 +585,9 @@ def nunique(self, dropna: bool = True) -> Series:
585585
# we might have duplications among the bins
586586
if len(res) != len(ri):
587587
res, out = np.zeros(len(ri), dtype=out.dtype), res
588-
res[ids[idx]] = out
588+
if len(ids) > 0:
589+
# GH#21334s
590+
res[ids[idx]] = out
589591

590592
result = self.obj._constructor(res, index=ri, name=self.obj.name)
591593
return self._reindex_output(result, fill_value=0)

pandas/tests/groupby/test_nunique.py

+13
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,16 @@ def test_nunique_transform_with_datetime():
182182
result = df.groupby([0, 0, 1])["date"].transform("nunique")
183183
expected = Series([2, 2, 1], name="date")
184184
tm.assert_series_equal(result, expected)
185+
186+
187+
def test_empty_categorical(observed):
188+
# GH#21334
189+
cat = Series([1]).astype("category")
190+
ser = cat[:0]
191+
gb = ser.groupby(ser, observed=observed)
192+
result = gb.nunique()
193+
if observed:
194+
expected = Series([], index=cat[:0], dtype="int64")
195+
else:
196+
expected = Series([0], index=cat, dtype="int64")
197+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)