pandas-dev · topper-123 · Nov 28, 2020 · Nov 28, 2020 · jbrockmendel · Nov 28, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -254,6 +254,7 @@ Other enhancements
 - :func:`read_csv` supports memory-mapping for compressed files (:issue:`37621`)
 - Improve error reporting for :meth:`DataFrame.merge` when invalid merge column definitions were given (:issue:`16228`)
 - Improve numerical stability for :meth:`.Rolling.skew`, :meth:`.Rolling.kurt`, :meth:`Expanding.skew` and :meth:`Expanding.kurt` through implementation of Kahan summation (:issue:`6929`)
+- :meth:`Categorical.unique` has a new parameter ``remove_unused_categories``, which if set to ``False``, keeps the dtype of the original categorical (:issue:`38135`)
 - Improved error reporting for subsetting columns of a :class:`.DataFrameGroupBy` with ``axis=1`` (:issue:`37725`)
 - Implement method ``cross`` for :meth:`DataFrame.merge` and :meth:`DataFrame.join` (:issue:`5401`)
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -2035,16 +2035,24 @@ def mode(self, dropna=True):
     # ------------------------------------------------------------------
     # ExtensionArray Interface
 
-    def unique(self):
+    def unique(self, remove_unused_categories: bool = True) -> "Categorical":
         """
         Return the ``Categorical`` which ``categories`` and ``codes`` are
-        unique. Unused categories are NOT returned.
+        unique. By default, unused categories are NOT returned.
 
         - unordered category: values and categories are sorted by appearance
           order.
         - ordered category: values are sorted by appearance order, categories
           keeps existing order.
 
+        Parameters
+        ----------
+        remove_unused_categories : bool, default True
+            If True, unused categories are not returned.
+            If False, the input dtype is returned unchanged.
+
+            .. versionadded:: 1.2.0
+
         Returns
         -------
         unique values : ``Categorical``
@@ -2075,13 +2083,24 @@ def unique(self):
         ... ).unique()
         ['b', 'a', 'c']
         Categories (3, object): ['a' < 'b' < 'c']
+
+        By default, unused categories are removed, but this can be changed:
+
+        >>> cat = pd.Categorical(list("baab"), categories=list("abc"), ordered=True)
+        >>> cat.unique()
+        ['b', 'a']
+        Categories (2, object): ['a' < 'b']
+        >>> cat.unique(remove_unused_categories=False)
+        ['b', 'a']
+        Categories (3, object): ['a' < 'b' < 'c']
         """
         # unlike np.unique, unique1d does not sort
         unique_codes = unique1d(self.codes)
-        cat = self.copy()
 
-        # keep nan in codes
-        cat._codes = unique_codes
+        cat = self._constructor(unique_codes, dtype=self.dtype, fastpath=True)
+
+        if not remove_unused_categories:
+            return cat
 
         # exclude nan from indexer for categories
         take_codes = unique_codes[unique_codes != -1]

diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py
@@ -6,7 +6,7 @@
 
 from pandas.compat import PYPY
 
-from pandas import Categorical, Index, NaT, Series, date_range
+from pandas import Categorical, CategoricalDtype, Index, NaT, Series, date_range
 import pandas._testing as tm
 from pandas.api.types import is_scalar
 
@@ -242,6 +242,28 @@ def test_unique_ordered(self):
         exp_cat = Categorical(["b", np.nan, "a"], categories=["a", "b"], ordered=True)
         tm.assert_categorical_equal(res, exp_cat)
 
+    @pytest.mark.parametrize(
+        "values, expected",
+        [
+            [list("abc"), list("abc")],
+            [list("bac"), list("bac")],
+            [list("ab"), list("ab")],
+            [list("bc"), list("bc")],
+            [list("aabbcc"), list("abc")],
+            [list("aabb"), list("ab")],
+            [[np.nan, "a", "b"], [np.nan, "a", "b"]],
+            [["a", "b", np.nan], ["a", "b", np.nan]],
+            [["a", "b", "a", "b", np.nan], ["a", "b", np.nan]],
+        ],
+    )
+    def test_unique_keep_unused_categories(self, values, expected, ordered):
+        # GH38135
+        dtype = CategoricalDtype(list("abc"), ordered=ordered)
+        result = Categorical(values, dtype=dtype).unique(remove_unused_categories=False)
+        expected = Categorical(expected, dtype=dtype)
+
+        tm.assert_categorical_equal(result, expected)
+
     def test_unique_index_series(self):
         c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1])
         # Categorical.unique sorts categories by appearance order