Skip to content

Commit 46841f8

Browse files
authored
BUG: GH38672 SeriesGroupBy.value_counts for categorical (#38796)
1 parent dd52445 commit 46841f8

File tree

3 files changed

+51
-7
lines changed

3 files changed

+51
-7
lines changed

doc/source/whatsnew/v1.3.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ Plotting
284284
Groupby/resample/rolling
285285
^^^^^^^^^^^^^^^^^^^^^^^^
286286

287-
-
287+
- Bug in :meth:`SeriesGroupBy.value_counts` where unobserved categories in a grouped categorical series were not tallied (:issue:`38672`)
288288
-
289289

290290
Reshaping

pandas/core/groupby/generic.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
ensure_int64,
4343
ensure_platform_int,
4444
is_bool,
45+
is_categorical_dtype,
4546
is_integer_dtype,
4647
is_interval_dtype,
4748
is_numeric_dtype,
@@ -681,9 +682,10 @@ def value_counts(
681682
from pandas.core.reshape.merge import get_join_indexers
682683
from pandas.core.reshape.tile import cut
683684

684-
if bins is not None and not np.iterable(bins):
685-
# scalar bins cannot be done at top level
686-
# in a backward compatible way
685+
ids, _, _ = self.grouper.group_info
686+
val = self.obj._values
687+
688+
def apply_series_value_counts():
687689
return self.apply(
688690
Series.value_counts,
689691
normalize=normalize,
@@ -692,8 +694,14 @@ def value_counts(
692694
bins=bins,
693695
)
694696

695-
ids, _, _ = self.grouper.group_info
696-
val = self.obj._values
697+
if bins is not None:
698+
if not np.iterable(bins):
699+
# scalar bins cannot be done at top level
700+
# in a backward compatible way
701+
return apply_series_value_counts()
702+
elif is_categorical_dtype(val):
703+
# GH38672
704+
return apply_series_value_counts()
697705

698706
# groupby removes null keys from groupings
699707
mask = ids != -1

pandas/tests/groupby/test_value_counts.py

+37-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,16 @@
99
import numpy as np
1010
import pytest
1111

12-
from pandas import DataFrame, Grouper, MultiIndex, Series, date_range, to_datetime
12+
from pandas import (
13+
Categorical,
14+
CategoricalIndex,
15+
DataFrame,
16+
Grouper,
17+
MultiIndex,
18+
Series,
19+
date_range,
20+
to_datetime,
21+
)
1322
import pandas._testing as tm
1423

1524

@@ -111,3 +120,30 @@ def test_series_groupby_value_counts_with_grouper():
111120
expected.index.names = result.index.names
112121

113122
tm.assert_series_equal(result, expected)
123+
124+
125+
def test_series_groupby_value_counts_on_categorical():
126+
# GH38672
127+
128+
s = Series(Categorical(["a"], categories=["a", "b"]))
129+
result = s.groupby([0]).value_counts()
130+
131+
expected = Series(
132+
data=[1, 0],
133+
index=MultiIndex.from_arrays(
134+
[
135+
[0, 0],
136+
CategoricalIndex(
137+
["a", "b"], categories=["a", "b"], ordered=False, dtype="category"
138+
),
139+
]
140+
),
141+
name=0,
142+
)
143+
144+
# Expected:
145+
# 0 a 1
146+
# b 0
147+
# Name: 0, dtype: int64
148+
149+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)