From 2ff69a37a2ee9728ebbd37fdad330d4dafa52d95 Mon Sep 17 00:00:00 2001
From: Andrew Eckart <andrew.g.eckart@gmail.com>
Date: Tue, 12 Oct 2021 14:14:32 -0500
Subject: [PATCH 1/2] ENH: Support observed keyword argument in
 Categorical.value_counts (#43498)

---
 pandas/core/arrays/categorical.py          | 8 ++++++--
 pandas/tests/extension/test_categorical.py | 9 +++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 759c7fb65374d..cf562e3a40b24 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1602,7 +1602,7 @@ def notna(self) -> np.ndarray:
 
     notnull = notna
 
-    def value_counts(self, dropna: bool = True):
+    def value_counts(self, dropna: bool = True, observed: bool = False):
         """
         Return a Series containing counts of each category.
 
@@ -1612,6 +1612,9 @@ def value_counts(self, dropna: bool = True):
         ----------
         dropna : bool, default True
             Don't include counts of NaN.
+        observed : bool, default False
+            If True, only include counts for observed categories.
+            If False, include counts for all categories.
 
         Returns
         -------
@@ -1640,7 +1643,8 @@ def value_counts(self, dropna: bool = True):
         ix = coerce_indexer_dtype(ix, self.dtype.categories)
         ix = self._from_backing_data(ix)
 
-        return Series(count, index=CategoricalIndex(ix), dtype="int64")
+        counts = Series(count, index=CategoricalIndex(ix), dtype="int64")
+        return counts[counts != 0] if observed else counts
 
     # error: Argument 2 of "_empty" is incompatible with supertype
     # "NDArrayBackedExtensionArray"; supertype defines the argument type as
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index 6a1a9512bc036..dfc8aaa48ebb0 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -163,6 +163,15 @@ class TestMethods(base.BaseMethodsTests):
     def test_value_counts(self, all_data, dropna):
         return super().test_value_counts(all_data, dropna)
 
+    def test_value_counts_observed(self, data):
+        data = data.add_categories(["#", "?"])  # Add some unobserved categories
+        series = pd.Series(data, dtype=data.categories.dtype)
+        result = data.value_counts(observed=True).sort_index()
+        expected = series.value_counts().sort_index()
+        self.assert_series_equal(
+            result, expected, check_index_type=False, check_categorical=False
+        )
+
     def test_combine_add(self, data_repeated):
         # GH 20825
         # When adding categoricals in combine, result is a string

From 30e0540a2e1073a23732981a8bc739f020939543 Mon Sep 17 00:00:00 2001
From: Andrew Eckart <andrew.g.eckart@gmail.com>
Date: Tue, 12 Oct 2021 18:00:35 -0500
Subject: [PATCH 2/2] PR feedback: issue reference and whatsnew

---
 doc/source/whatsnew/v1.4.0.rst             | 2 +-
 pandas/tests/extension/test_categorical.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 7828c479e800f..55967c6fdbbfe 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -211,7 +211,7 @@ Other enhancements
 - :meth:`.GroupBy.mean` now supports `Numba <http://numba.pydata.org/>`_ execution with the ``engine`` keyword (:issue:`43731`)
 - :meth:`Timestamp.isoformat`, now handles the ``timespec`` argument from the base :class:``datetime`` class (:issue:`26131`)
 - :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`44460`)
--
+- :meth:`Categorical.value_counts` now supports the argument ``observed`` (:issue:`43498`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index dfc8aaa48ebb0..da3f8d11a4a18 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -164,6 +164,8 @@ def test_value_counts(self, all_data, dropna):
         return super().test_value_counts(all_data, dropna)
 
     def test_value_counts_observed(self, data):
+        # GH 43498
+        # When observed=True is passed, unobserved categories should be omitted
         data = data.add_categories(["#", "?"])  # Add some unobserved categories
         series = pd.Series(data, dtype=data.categories.dtype)
         result = data.value_counts(observed=True).sort_index()