diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 7828c479e800f..55967c6fdbbfe 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -211,7 +211,7 @@ Other enhancements - :meth:`.GroupBy.mean` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`43731`) - :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`) - :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`44460`) -- +- :meth:`Categorical.value_counts` now supports the argument ``observed`` (:issue:`43498`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 759c7fb65374d..cf562e3a40b24 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1602,7 +1602,7 @@ def notna(self) -> np.ndarray: notnull = notna - def value_counts(self, dropna: bool = True): + def value_counts(self, dropna: bool = True, observed: bool = False): """ Return a Series containing counts of each category. @@ -1612,6 +1612,9 @@ def value_counts(self, dropna: bool = True): ---------- dropna : bool, default True Don't include counts of NaN. + observed : bool, default False + If True, only include counts for observed categories. + If False, include counts for all categories. Returns ------- @@ -1640,7 +1643,8 @@ def value_counts(self, dropna: bool = True): ix = coerce_indexer_dtype(ix, self.dtype.categories) ix = self._from_backing_data(ix) - return Series(count, index=CategoricalIndex(ix), dtype="int64") + counts = Series(count, index=CategoricalIndex(ix), dtype="int64") + return counts[counts != 0] if observed else counts # error: Argument 2 of "_empty" is incompatible with supertype # "NDArrayBackedExtensionArray"; supertype defines the argument type as diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 6a1a9512bc036..da3f8d11a4a18 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -163,6 +163,17 @@ class TestMethods(base.BaseMethodsTests): def test_value_counts(self, all_data, dropna): return super().test_value_counts(all_data, dropna) + def test_value_counts_observed(self, data): + # GH 43498 + # When observed=True is passed, unobserved categories should be omitted + data = data.add_categories(["#", "?"]) # Add some unobserved categories + series = pd.Series(data, dtype=data.categories.dtype) + result = data.value_counts(observed=True).sort_index() + expected = series.value_counts().sort_index() + self.assert_series_equal( + result, expected, check_index_type=False, check_categorical=False + ) + def test_combine_add(self, data_repeated): # GH 20825 # When adding categoricals in combine, result is a string