Skip to content

Commit 2ff69a3

Browse files
committed
ENH: Support observed keyword argument in Categorical.value_counts (#43498)
1 parent 5cdde87 commit 2ff69a3

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

pandas/core/arrays/categorical.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1602,7 +1602,7 @@ def notna(self) -> np.ndarray:
16021602

16031603
notnull = notna
16041604

1605-
def value_counts(self, dropna: bool = True):
1605+
def value_counts(self, dropna: bool = True, observed: bool = False):
16061606
"""
16071607
Return a Series containing counts of each category.
16081608
@@ -1612,6 +1612,9 @@ def value_counts(self, dropna: bool = True):
16121612
----------
16131613
dropna : bool, default True
16141614
Don't include counts of NaN.
1615+
observed : bool, default False
1616+
If True, only include counts for observed categories.
1617+
If False, include counts for all categories.
16151618
16161619
Returns
16171620
-------
@@ -1640,7 +1643,8 @@ def value_counts(self, dropna: bool = True):
16401643
ix = coerce_indexer_dtype(ix, self.dtype.categories)
16411644
ix = self._from_backing_data(ix)
16421645

1643-
return Series(count, index=CategoricalIndex(ix), dtype="int64")
1646+
counts = Series(count, index=CategoricalIndex(ix), dtype="int64")
1647+
return counts[counts != 0] if observed else counts
16441648

16451649
# error: Argument 2 of "_empty" is incompatible with supertype
16461650
# "NDArrayBackedExtensionArray"; supertype defines the argument type as

pandas/tests/extension/test_categorical.py

+9
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,15 @@ class TestMethods(base.BaseMethodsTests):
163163
def test_value_counts(self, all_data, dropna):
164164
return super().test_value_counts(all_data, dropna)
165165

166+
def test_value_counts_observed(self, data):
167+
data = data.add_categories(["#", "?"]) # Add some unobserved categories
168+
series = pd.Series(data, dtype=data.categories.dtype)
169+
result = data.value_counts(observed=True).sort_index()
170+
expected = series.value_counts().sort_index()
171+
self.assert_series_equal(
172+
result, expected, check_index_type=False, check_categorical=False
173+
)
174+
166175
def test_combine_add(self, data_repeated):
167176
# GH 20825
168177
# When adding categoricals in combine, result is a string

0 commit comments

Comments
 (0)