Skip to content

Commit 9cf8f79

Browse files
committed
ENH: Support observed keyword argument in Categorical.value_counts (#43498)
1 parent 075ec87 commit 9cf8f79

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

pandas/core/arrays/categorical.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1606,7 +1606,7 @@ def notna(self) -> np.ndarray:
16061606

16071607
notnull = notna
16081608

1609-
def value_counts(self, dropna: bool = True):
1609+
def value_counts(self, dropna: bool = True, observed: bool = False):
16101610
"""
16111611
Return a Series containing counts of each category.
16121612
@@ -1616,6 +1616,9 @@ def value_counts(self, dropna: bool = True):
16161616
----------
16171617
dropna : bool, default True
16181618
Don't include counts of NaN.
1619+
observed : bool, default False
1620+
If True, only show counts for observed categories.
1621+
If False, show counts for all categories.
16191622
16201623
Returns
16211624
-------
@@ -1644,7 +1647,8 @@ def value_counts(self, dropna: bool = True):
16441647
ix = coerce_indexer_dtype(ix, self.dtype.categories)
16451648
ix = self._from_backing_data(ix)
16461649

1647-
return Series(count, index=CategoricalIndex(ix), dtype="int64")
1650+
counts = Series(count, index=CategoricalIndex(ix), dtype="int64")
1651+
return counts[counts != 0] if observed else counts
16481652

16491653
# error: Argument 2 of "_empty" is incompatible with supertype
16501654
# "NDArrayBackedExtensionArray"; supertype defines the argument type as

pandas/tests/extension/test_categorical.py

+9
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,15 @@ class TestMethods(base.BaseMethodsTests):
163163
def test_value_counts(self, all_data, dropna):
164164
return super().test_value_counts(all_data, dropna)
165165

166+
def test_value_counts_observed(self, data):
167+
data = data.add_categories(["#", "?"]) # Add some unobserved categories
168+
series = pd.Series(data, dtype=data.categories.dtype)
169+
result = data.value_counts(observed=True).sort_index()
170+
expected = series.value_counts().sort_index()
171+
self.assert_series_equal(
172+
result, expected, check_index_type=False, check_categorical=False
173+
)
174+
166175
def test_combine_add(self, data_repeated):
167176
# GH 20825
168177
# When adding categoricals in combine, result is a string

0 commit comments

Comments
 (0)