From 6de5608c7c2772b8ce85032d5d07d3432fb2fef7 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sat, 28 Nov 2020 19:56:21 +0000
Subject: [PATCH 01/14] ENH: Categorical.unique can keep same dtype

---
 doc/source/whatsnew/v1.2.0.rst                |  1 +
 pandas/core/groupby/categorical.py            |  5 ++
 .../arrays/categorical/test_analytics.py      | 79 +++++--------------
 3 files changed, 25 insertions(+), 60 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 6dd011c588702..7d801fa3c07b0 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -585,6 +585,7 @@ Categorical
 - :meth:`Categorical.fillna` will always return a copy, validate a passed fill value regardless of whether there are any NAs to fill, and disallow an ``NaT`` as a fill value for numeric categories (:issue:`36530`)
 - Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`)
 - Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`)
+- Bug in :meth:`Categorical.unique` where dtype was changed, it there were unused categories (:issue:`xxxxx`).
 - Bug in :meth:`CategoricalIndex.where` incorrectly setting non-category entries to ``np.nan`` instead of raising ``TypeError`` (:issue:`37977`)
 - Bug in :meth:`Categorical.to_numpy` and ``np.array(categorical)`` with tz-aware ``datetime64`` categories incorrectly dropping the time zone information instead of casting to object dtype (:issue:`38136`)
 
diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py
index 6de8c1d789097..8c740fe0e5a89 100644
--- a/pandas/core/groupby/categorical.py
+++ b/pandas/core/groupby/categorical.py
@@ -75,6 +75,11 @@ def recode_for_groupby(
 
     # sort=False should order groups in as-encountered order (GH-8868)
     cat = c.unique()
+    # exclude nan from indexer for categories
+    take_codes = cat.codes[cat.codes != -1]
+    if cat.ordered:
+        take_codes = np.sort(take_codes)
+    cat = cat.set_categories(cat.categories.take(take_codes))
 
     # But for groupby to work, all categories should be present,
     # including those missing from the data (GH-13179), which .unique()
diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py
index 6899d821f80ad..f4108df52d1a0 100644
--- a/pandas/tests/arrays/categorical/test_analytics.py
+++ b/pandas/tests/arrays/categorical/test_analytics.py
@@ -6,13 +6,7 @@
 
 from pandas.compat import PYPY
 
-from pandas import (
-    Categorical,
-    Index,
-    NaT,
-    Series,
-    date_range,
-)
+from pandas import Categorical, CategoricalDtype, Index, NaT, Series, date_range
 import pandas._testing as tm
 from pandas.api.types import is_scalar
 
@@ -196,84 +190,49 @@ def test_searchsorted(self, ordered):
         with pytest.raises(KeyError, match="cucumber"):
             ser.searchsorted(["bread", "cucumber"])
 
-    def test_unique(self):
+    def test_unique(self, ordered):
+        # GHXXXXX
+        dtype = CategoricalDtype(["a", "b", "c"], ordered=ordered)
+
         # categories are reordered based on value when ordered=False
-        cat = Categorical(["a", "b"])
-        exp = Index(["a", "b"])
+        cat = Categorical(["a", "b", "c"], dtype=dtype)
         res = cat.unique()
-        tm.assert_index_equal(res.categories, exp)
         tm.assert_categorical_equal(res, cat)
 
-        cat = Categorical(["a", "b", "a", "a"], categories=["a", "b", "c"])
+        cat = Categorical(["a", "b", "a", "a"], dtype=dtype)
         res = cat.unique()
-        tm.assert_index_equal(res.categories, exp)
-        tm.assert_categorical_equal(res, Categorical(exp))
+        tm.assert_categorical_equal(res, Categorical(["a", "b"], dtype=dtype))
 
-        cat = Categorical(["c", "a", "b", "a", "a"], categories=["a", "b", "c"])
-        exp = Index(["c", "a", "b"])
+        cat = Categorical(["c", "a", "b", "a", "a"], dtype=dtype)
         res = cat.unique()
-        tm.assert_index_equal(res.categories, exp)
-        exp_cat = Categorical(exp, categories=["c", "a", "b"])
+        exp_cat = Categorical(["c", "a", "b"], dtype=dtype)
         tm.assert_categorical_equal(res, exp_cat)
 
         # nan must be removed
-        cat = Categorical(["b", np.nan, "b", np.nan, "a"], categories=["a", "b", "c"])
-        res = cat.unique()
-        exp = Index(["b", "a"])
-        tm.assert_index_equal(res.categories, exp)
-        exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a"])
-        tm.assert_categorical_equal(res, exp_cat)
-
-    def test_unique_ordered(self):
-        # keep categories order when ordered=True
-        cat = Categorical(["b", "a", "b"], categories=["a", "b"], ordered=True)
+        cat = Categorical(["b", np.nan, "b", np.nan, "a"], dtype=dtype)
         res = cat.unique()
-        exp_cat = Categorical(["b", "a"], categories=["a", "b"], ordered=True)
+        exp_cat = Categorical(["b", np.nan, "a"], dtype=dtype)
         tm.assert_categorical_equal(res, exp_cat)
 
-        cat = Categorical(
-            ["c", "b", "a", "a"], categories=["a", "b", "c"], ordered=True
-        )
-        res = cat.unique()
-        exp_cat = Categorical(["c", "b", "a"], categories=["a", "b", "c"], ordered=True)
-        tm.assert_categorical_equal(res, exp_cat)
-
-        cat = Categorical(["b", "a", "a"], categories=["a", "b", "c"], ordered=True)
-        res = cat.unique()
-        exp_cat = Categorical(["b", "a"], categories=["a", "b"], ordered=True)
-        tm.assert_categorical_equal(res, exp_cat)
+    def test_unique_index_series(self, ordered):
+        # GHXXXXX
+        dtype = CategoricalDtype([3, 2, 1], ordered=ordered)
 
-        cat = Categorical(
-            ["b", "b", np.nan, "a"], categories=["a", "b", "c"], ordered=True
-        )
-        res = cat.unique()
-        exp_cat = Categorical(["b", np.nan, "a"], categories=["a", "b"], ordered=True)
-        tm.assert_categorical_equal(res, exp_cat)
-
-    def test_unique_index_series(self):
-        c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1])
+        c = Categorical([3, 1, 2, 2, 1], dtype=dtype)
         # Categorical.unique sorts categories by appearance order
         # if ordered=False
-        exp = Categorical([3, 1, 2], categories=[3, 1, 2])
+        exp = Categorical([3, 1, 2], dtype=dtype)
         tm.assert_categorical_equal(c.unique(), exp)
 
         tm.assert_index_equal(Index(c).unique(), Index(exp))
         tm.assert_categorical_equal(Series(c).unique(), exp)
 
-        c = Categorical([1, 1, 2, 2], categories=[3, 2, 1])
-        exp = Categorical([1, 2], categories=[1, 2])
+        c = Categorical([1, 1, 2, 2], dtype=dtype)
+        exp = Categorical([1, 2], dtype=dtype)
         tm.assert_categorical_equal(c.unique(), exp)
         tm.assert_index_equal(Index(c).unique(), Index(exp))
         tm.assert_categorical_equal(Series(c).unique(), exp)
 
-        c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1], ordered=True)
-        # Categorical.unique keeps categories order if ordered=True
-        exp = Categorical([3, 1, 2], categories=[3, 2, 1], ordered=True)
-        tm.assert_categorical_equal(c.unique(), exp)
-
-        tm.assert_index_equal(Index(c).unique(), Index(exp))
-        tm.assert_categorical_equal(Series(c).unique(), exp)
-
     def test_shift(self):
         # GH 9416
         cat = Categorical(["a", "b", "c", "d", "a"])

From b0aed5c551b2f9c7f2b71d29b4a2f40ac18b451d Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sat, 28 Nov 2020 20:10:05 +0000
Subject: [PATCH 02/14] fixes

---
 doc/source/whatsnew/v1.2.0.rst                    | 2 +-
 pandas/tests/arrays/categorical/test_analytics.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 7d801fa3c07b0..ac57dcebcf494 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -585,7 +585,7 @@ Categorical
 - :meth:`Categorical.fillna` will always return a copy, validate a passed fill value regardless of whether there are any NAs to fill, and disallow an ``NaT`` as a fill value for numeric categories (:issue:`36530`)
 - Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`)
 - Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`)
-- Bug in :meth:`Categorical.unique` where dtype was changed, it there were unused categories (:issue:`xxxxx`).
+- Bug in :meth:`Categorical.unique` where the dtype changes in the unique array if there are unused categories in the original array (:issue:`38140`).
 - Bug in :meth:`CategoricalIndex.where` incorrectly setting non-category entries to ``np.nan`` instead of raising ``TypeError`` (:issue:`37977`)
 - Bug in :meth:`Categorical.to_numpy` and ``np.array(categorical)`` with tz-aware ``datetime64`` categories incorrectly dropping the time zone information instead of casting to object dtype (:issue:`38136`)
 
diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py
index f4108df52d1a0..7fd15aa02b40c 100644
--- a/pandas/tests/arrays/categorical/test_analytics.py
+++ b/pandas/tests/arrays/categorical/test_analytics.py
@@ -191,7 +191,7 @@ def test_searchsorted(self, ordered):
             ser.searchsorted(["bread", "cucumber"])
 
     def test_unique(self, ordered):
-        # GHXXXXX
+        # GH38140
         dtype = CategoricalDtype(["a", "b", "c"], ordered=ordered)
 
         # categories are reordered based on value when ordered=False
@@ -215,7 +215,7 @@ def test_unique(self, ordered):
         tm.assert_categorical_equal(res, exp_cat)
 
     def test_unique_index_series(self, ordered):
-        # GHXXXXX
+        # GH38140
         dtype = CategoricalDtype([3, 2, 1], ordered=ordered)
 
         c = Categorical([3, 1, 2, 2, 1], dtype=dtype)

From 9135f458658c61ada499f7d671a7968e6cd67c31 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sat, 28 Nov 2020 20:35:05 +0000
Subject: [PATCH 03/14] fix doc string

---
 pandas/core/arrays/categorical.py | 29 ++++++++---------------------
 1 file changed, 8 insertions(+), 21 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index f2b5ad447a0cf..84c22298a8a9a 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2127,16 +2127,15 @@ def mode(self, dropna=True):
     def unique(self):
         """
         Return the ``Categorical`` which ``categories`` and ``codes`` are
-        unique. Unused categories are NOT returned.
+        unique.
 
-        - unordered category: values and categories are sorted by appearance
-          order.
-        - ordered category: values are sorted by appearance order, categories
-          keeps existing order.
+        .. versionchanged:: 1.2.0
+
+            Previously unused categories were dropped.
 
         Returns
         -------
-        unique values : ``Categorical``
+        Categorical
 
         See Also
         --------
@@ -2146,23 +2145,11 @@ def unique(self):
 
         Examples
         --------
-        An unordered Categorical will return categories in the
-        order of appearance.
-
         >>> pd.Categorical(list("baabc")).unique()
         ['b', 'a', 'c']
-        Categories (3, object): ['b', 'a', 'c']
-
-        >>> pd.Categorical(list("baabc"), categories=list("abc")).unique()
-        ['b', 'a', 'c']
-        Categories (3, object): ['b', 'a', 'c']
-
-        An ordered Categorical preserves the category ordering.
-
-        >>> pd.Categorical(
-        ...     list("baabc"), categories=list("abc"), ordered=True
-        ... ).unique()
-        ['b', 'a', 'c']
+        Categories (3, object): ['a', 'b', 'c']
+        >>> pd.Categorical(list("baab"), categories=list("abc"), ordered=True).unique()
+        ['b', 'a']
         Categories (3, object): ['a' < 'b' < 'c']
         """
         # unlike np.unique, unique1d does not sort

From 8fcf4e1d5070214ff2d2baf7ad042e3d24ae57d8 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sat, 28 Nov 2020 21:18:23 +0000
Subject: [PATCH 04/14] fix doc strings

---
 doc/source/whatsnew/v1.2.0.rst     | 2 +-
 pandas/core/arrays/categorical.py  | 2 +-
 pandas/core/groupby/categorical.py | 2 ++
 pandas/core/series.py              | 9 +++------
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index ac57dcebcf494..934f0413f6eb6 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -585,7 +585,7 @@ Categorical
 - :meth:`Categorical.fillna` will always return a copy, validate a passed fill value regardless of whether there are any NAs to fill, and disallow an ``NaT`` as a fill value for numeric categories (:issue:`36530`)
 - Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`)
 - Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`)
-- Bug in :meth:`Categorical.unique` where the dtype changes in the unique array if there are unused categories in the original array (:issue:`38140`).
+- Bug in :meth:`Categorical.unique`, where the dtype changed in the unique array if there were unused categories in the original array (:issue:`38140`).
 - Bug in :meth:`CategoricalIndex.where` incorrectly setting non-category entries to ``np.nan`` instead of raising ``TypeError`` (:issue:`37977`)
 - Bug in :meth:`Categorical.to_numpy` and ``np.array(categorical)`` with tz-aware ``datetime64`` categories incorrectly dropping the time zone information instead of casting to object dtype (:issue:`38136`)
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 84c22298a8a9a..f6067714c960b 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2131,7 +2131,7 @@ def unique(self):
 
         .. versionchanged:: 1.2.0
 
-            Previously unused categories were dropped.
+            Previously, unused categories were dropped from the new categories.
 
         Returns
         -------
diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py
index 8c740fe0e5a89..297681f1e10f5 100644
--- a/pandas/core/groupby/categorical.py
+++ b/pandas/core/groupby/categorical.py
@@ -75,6 +75,8 @@ def recode_for_groupby(
 
     # sort=False should order groups in as-encountered order (GH-8868)
     cat = c.unique()
+
+    # See GH-38140 for block below
     # exclude nan from indexer for categories
     take_codes = cat.codes[cat.codes != -1]
     if cat.ordered:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 5c605a6b441c6..33e3bfb6ee3aa 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1993,15 +1993,12 @@ def unique(self) -> ArrayLike:
         ['2016-01-01 00:00:00-05:00']
         Length: 1, dtype: datetime64[ns, US/Eastern]
 
-        An unordered Categorical will return categories in the order of
-        appearance.
+        An Categorical will return categories in the order of
+        appearance and with the same dtype.
 
         >>> pd.Series(pd.Categorical(list('baabc'))).unique()
         ['b', 'a', 'c']
-        Categories (3, object): ['b', 'a', 'c']
-
-        An ordered Categorical preserves the category ordering.
-
+        Categories (3, object): ['a', 'b', 'c']
         >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'),
         ...                          ordered=True)).unique()
         ['b', 'a', 'c']

From 356267b74886b0407da482f678ecd97fd8f9b4fb Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sat, 28 Nov 2020 23:49:51 +0000
Subject: [PATCH 05/14] fix categorical tests

---
 pandas/tests/base/test_unique.py              |  2 --
 .../indexes/categorical/test_category.py      | 19 ++++++++++---------
 pandas/tests/test_algos.py                    |  4 ++--
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py
index 4aefa4be176fb..26e785a2796b1 100644
--- a/pandas/tests/base/test_unique.py
+++ b/pandas/tests/base/test_unique.py
@@ -67,8 +67,6 @@ def test_unique_null(null_obj, index_or_series_obj):
         if is_datetime64tz_dtype(obj.dtype):
             result = result.normalize()
             expected = expected.normalize()
-        elif isinstance(obj, pd.CategoricalIndex):
-            expected = expected.set_categories(unique_values_not_null)
         tm.assert_index_equal(result, expected)
     else:
         expected = np.array(unique_values, dtype=obj.dtype)
diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py
index d3c9b02b3ba23..bd2382b062aae 100644
--- a/pandas/tests/indexes/categorical/test_category.py
+++ b/pandas/tests/indexes/categorical/test_category.py
@@ -4,7 +4,7 @@
 from pandas._libs import index as libindex
 
 import pandas as pd
-from pandas import Categorical
+from pandas import Categorical, CategoricalDtype
 import pandas._testing as tm
 from pandas.core.indexes.api import (
     CategoricalIndex,
@@ -186,18 +186,19 @@ def test_drop_duplicates(self, data, categories, expected):
             tm.assert_index_equal(result, e)
 
     @pytest.mark.parametrize(
-        "data, categories, expected_data, expected_categories",
+        "data, categories, expected_data",
         [
-            ([1, 1, 1], [1, 2, 3], [1], [1]),
-            ([1, 1, 1], list("abc"), [np.nan], []),
-            ([1, 2, "a"], [1, 2, 3], [1, 2, np.nan], [1, 2]),
-            ([2, "a", "b"], list("abc"), [np.nan, "a", "b"], ["a", "b"]),
+            ([1, 1, 1], [1, 2, 3], [1]),
+            ([1, 1, 1], list("abc"), [np.nan]),
+            ([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]),
+            ([2, "a", "b"], list("abc"), [np.nan, "a", "b"]),
         ],
     )
-    def test_unique(self, data, categories, expected_data, expected_categories):
+    def test_unique(self, data, categories, expected_data, ordered):
+        dtype = CategoricalDtype(categories, ordered=ordered)
 
-        idx = CategoricalIndex(data, categories=categories)
-        expected = CategoricalIndex(expected_data, categories=expected_categories)
+        idx = CategoricalIndex(data, dtype=dtype)
+        expected = CategoricalIndex(expected_data, dtype=dtype)
         tm.assert_index_equal(idx.unique(), expected)
 
     def test_repr_roundtrip(self):
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 127baae6e9352..c9d034361d8c4 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -602,7 +602,7 @@ def test_categorical(self):
 
         # we are expecting to return in the order
         # of appearance
-        expected = Categorical(list("bac"), categories=list("bac"))
+        expected = Categorical(list("bac"))
 
         # we are expecting to return in the order
         # of the categories
@@ -632,7 +632,7 @@ def test_categorical(self):
         tm.assert_categorical_equal(result, expected)
 
         # CI -> return CI
-        ci = CategoricalIndex(Categorical(list("baabc"), categories=list("bac")))
+        ci = CategoricalIndex(Categorical(list("baabc"), categories=list("abc")))
         expected = CategoricalIndex(expected)
         result = ci.unique()
         tm.assert_index_equal(result, expected)

From 1c8f4f9e83f72c7b0a196f152c8b84884755634b Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sun, 29 Nov 2020 00:52:52 +0000
Subject: [PATCH 06/14] fix test failure

---
 pandas/tests/extension/base/methods.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
index 3ea5c34201b5c..589b4e3b71db5 100644
--- a/pandas/tests/extension/base/methods.py
+++ b/pandas/tests/extension/base/methods.py
@@ -40,10 +40,10 @@ def test_value_counts_with_normalize(self, data):
         # GH 33172
         data = data[:10].unique()
         values = np.array(data[~data.isna()])
+        ser = pd.Series(data, dtype=data.dtype)
 
-        result = (
-            pd.Series(data, dtype=data.dtype).value_counts(normalize=True).sort_index()
-        )
+        result = ser.value_counts(normalize=True).sort_index()
+        result = result[result > 0]
 
         expected = pd.Series([1 / len(values)] * len(values), index=result.index)
         self.assert_series_equal(result, expected)

From f31837c9d096cc980167797829dc171e119cb182 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sun, 29 Nov 2020 08:28:18 +0000
Subject: [PATCH 07/14] fix value_count test

---
 pandas/tests/extension/base/methods.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
index 589b4e3b71db5..856bd1ac13706 100644
--- a/pandas/tests/extension/base/methods.py
+++ b/pandas/tests/extension/base/methods.py
@@ -38,12 +38,10 @@ def test_value_counts(self, all_data, dropna):
 
     def test_value_counts_with_normalize(self, data):
         # GH 33172
-        data = data[:10].unique()
+        data = data[:10].unique().remove_unused_categories()
         values = np.array(data[~data.isna()])
-        ser = pd.Series(data, dtype=data.dtype)
 
-        result = ser.value_counts(normalize=True).sort_index()
-        result = result[result > 0]
+        result = pd.Series(data).value_counts(normalize=True).sort_index()
 
         expected = pd.Series([1 / len(values)] * len(values), index=result.index)
         self.assert_series_equal(result, expected)

From e261f3c916cbab5470b80739e12d209bf0e98158 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sun, 29 Nov 2020 08:58:56 +0000
Subject: [PATCH 08/14] values_count fix

---
 pandas/tests/extension/base/methods.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
index 856bd1ac13706..ca9c2acb9fd12 100644
--- a/pandas/tests/extension/base/methods.py
+++ b/pandas/tests/extension/base/methods.py
@@ -38,12 +38,18 @@ def test_value_counts(self, all_data, dropna):
 
     def test_value_counts_with_normalize(self, data):
         # GH 33172
-        data = data[:10].unique().remove_unused_categories()
+        data = data[:10].unique()
         values = np.array(data[~data.isna()])
+        ser = pd.Series(data, dtype=data.dtype)
 
-        result = pd.Series(data).value_counts(normalize=True).sort_index()
+        result = ser.value_counts(normalize=True).sort_index()
+
+        if not isinstance(data, pd.Categorical):
+            expected = pd.Series([1 / len(values)] * len(values), index=result.index)
+        else:
+            expected = pd.Series(0.0, index=result.index)
+            expected[result > 0] = 1 / len(values)
 
-        expected = pd.Series([1 / len(values)] * len(values), index=result.index)
         self.assert_series_equal(result, expected)
 
     def test_count(self, data_missing):

From a9859b6cf229bfef03bdc03d794555e4b16bd996 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sat, 12 Dec 2020 10:32:58 +0000
Subject: [PATCH 09/14] update

---
 doc/source/whatsnew/v1.2.0.rst |  1 -
 doc/source/whatsnew/v1.3.0.rst | 31 +++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 934f0413f6eb6..6dd011c588702 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -585,7 +585,6 @@ Categorical
 - :meth:`Categorical.fillna` will always return a copy, validate a passed fill value regardless of whether there are any NAs to fill, and disallow an ``NaT`` as a fill value for numeric categories (:issue:`36530`)
 - Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`)
 - Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`)
-- Bug in :meth:`Categorical.unique`, where the dtype changed in the unique array if there were unused categories in the original array (:issue:`38140`).
 - Bug in :meth:`CategoricalIndex.where` incorrectly setting non-category entries to ``np.nan`` instead of raising ``TypeError`` (:issue:`37977`)
 - Bug in :meth:`Categorical.to_numpy` and ``np.array(categorical)`` with tz-aware ``datetime64`` categories incorrectly dropping the time zone information instead of casting to object dtype (:issue:`38136`)
 
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 2b0b62ab7facf..dbe008015d9b5 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -230,6 +230,37 @@ Notable bug fixes
 
 These are bug fixes that might have notable behavior changes.
 
+``Categorical.unique`` now always maintains same dtype as original
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Previously, when calling :meth:`Categorical.unique`, unused categories in the new array
+would be removed, meaning that the dtype of the new array would be different than the
+original, if some categories are not present in the unique array:
+
+As an example of this, given:
+
+.. ipython:: python
+
+        dtype = pd.CategoricalDtype(['bad', 'neutral', 'good'], ordered=True)
+        original = pd.Categorical(['good','good', 'bad', 'bad'], dtype=dtype)
+        unique = original.unique()
+
+*pandas < 1.2.0*:
+
+.. code-block:: ipython
+
+    In [1]: unique
+    ['good', 'bad']
+    Categories (2, object): ['bad' < 'good']
+    In [2]: original.dtype == unique.dtype
+    False
+
+*pandas >= 1.2.0*
+
+.. ipython:: python
+
+        unique
+        original.dtype == unique.dtype
 
 Preserve dtypes in  :meth:`~pandas.DataFrame.combine_first`
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

From 9e29a11aaa5ef656413761fcbea2c9bdb0c23685 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Sat, 12 Dec 2020 11:28:14 +0000
Subject: [PATCH 10/14] fixes

---
 doc/source/whatsnew/v1.3.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index dbe008015d9b5..385b0577a86d9 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -242,7 +242,7 @@ As an example of this, given:
 .. ipython:: python
 
         dtype = pd.CategoricalDtype(['bad', 'neutral', 'good'], ordered=True)
-        original = pd.Categorical(['good','good', 'bad', 'bad'], dtype=dtype)
+        original = pd.Categorical(['good', 'good', 'bad', 'bad'], dtype=dtype)
         unique = original.unique()
 
 *pandas < 1.2.0*:

From 5ed054cf1510227da741a1bc01701fea43e678da Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Wed, 16 Dec 2020 06:59:59 +0000
Subject: [PATCH 11/14] Use series in whatsnew example

---
 doc/source/whatsnew/v1.3.0.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 385b0577a86d9..b07730aaf2293 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -233,16 +233,17 @@ These are bug fixes that might have notable behavior changes.
 ``Categorical.unique`` now always maintains same dtype as original
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Previously, when calling :meth:`Categorical.unique`, unused categories in the new array
+Previously, when calling :meth:`~Categorical.unique` with categorical data, unused categories in the new array
 would be removed, meaning that the dtype of the new array would be different than the
-original, if some categories are not present in the unique array:
+original, if some categories are not present in the unique array (:issue:`18291`)
 
 As an example of this, given:
 
 .. ipython:: python
 
         dtype = pd.CategoricalDtype(['bad', 'neutral', 'good'], ordered=True)
-        original = pd.Categorical(['good', 'good', 'bad', 'bad'], dtype=dtype)
+        cat = pd.Categorical(['good', 'good', 'bad', 'bad'], dtype=dtype)
+        original = pd.Series(cat)
         unique = original.unique()
 
 *pandas < 1.2.0*:

From f68a38b40d72c71ab4c40ec7dd96640e7e5648ac Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Tue, 22 Dec 2020 11:15:21 +0000
Subject: [PATCH 12/14] Update version in docs to v1.3.0

---
 doc/source/whatsnew/v1.3.0.rst    | 4 ++--
 pandas/core/arrays/categorical.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index b07730aaf2293..6631a175ecb72 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -246,7 +246,7 @@ As an example of this, given:
         original = pd.Series(cat)
         unique = original.unique()
 
-*pandas < 1.2.0*:
+*pandas < 1.3.0*:
 
 .. code-block:: ipython
 
@@ -256,7 +256,7 @@ As an example of this, given:
     In [2]: original.dtype == unique.dtype
     False
 
-*pandas >= 1.2.0*
+*pandas >= 1.3.0*
 
 .. ipython:: python
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index f6067714c960b..f97a6390e7f8e 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2129,7 +2129,7 @@ def unique(self):
         Return the ``Categorical`` which ``categories`` and ``codes`` are
         unique.
 
-        .. versionchanged:: 1.2.0
+        .. versionchanged:: 1.3.0
 
             Previously, unused categories were dropped from the new categories.
 

From a5e5096a18e1886ac38960441e4d3563e455322c Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Thu, 4 Mar 2021 18:00:46 +0000
Subject: [PATCH 13/14] diff from rebase

---
 pandas/core/arrays/categorical.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index f97a6390e7f8e..ba36e4a630e1f 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2152,18 +2152,8 @@ def unique(self):
         ['b', 'a']
         Categories (3, object): ['a' < 'b' < 'c']
         """
-        # unlike np.unique, unique1d does not sort
         unique_codes = unique1d(self.codes)
-        cat = self.copy()
-
-        # keep nan in codes
-        cat._ndarray = unique_codes
-
-        # exclude nan from indexer for categories
-        take_codes = unique_codes[unique_codes != -1]
-        if self.ordered:
-            take_codes = np.sort(take_codes)
-        return cat.set_categories(cat.categories.take(take_codes))
+        return self._from_backing_data(unique_codes)
 
     def _values_for_factorize(self):
         return self._ndarray, -1

From 0616c202f3ccd9407fea32d625689fec02f4b6f9 Mon Sep 17 00:00:00 2001
From: tp <contribute@tensortable.com>
Date: Thu, 4 Mar 2021 18:24:21 +0000
Subject: [PATCH 14/14] isort cleanup

---
 pandas/tests/arrays/categorical/test_analytics.py | 9 ++++++++-
 pandas/tests/indexes/categorical/test_category.py | 5 ++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py
index 7fd15aa02b40c..56d474497a166 100644
--- a/pandas/tests/arrays/categorical/test_analytics.py
+++ b/pandas/tests/arrays/categorical/test_analytics.py
@@ -6,7 +6,14 @@
 
 from pandas.compat import PYPY
 
-from pandas import Categorical, CategoricalDtype, Index, NaT, Series, date_range
+from pandas import (
+    Categorical,
+    CategoricalDtype,
+    Index,
+    NaT,
+    Series,
+    date_range,
+)
 import pandas._testing as tm
 from pandas.api.types import is_scalar
 
diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py
index bd2382b062aae..678344f5b6909 100644
--- a/pandas/tests/indexes/categorical/test_category.py
+++ b/pandas/tests/indexes/categorical/test_category.py
@@ -4,7 +4,10 @@
 from pandas._libs import index as libindex
 
 import pandas as pd
-from pandas import Categorical, CategoricalDtype
+from pandas import (
+    Categorical,
+    CategoricalDtype,
+)
 import pandas._testing as tm
 from pandas.core.indexes.api import (
     CategoricalIndex,