diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index 238a838cf727e..d1bfb21769ab1 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -193,6 +193,8 @@ Bug Fixes SQLAlchemy type (:issue:`9083`). +- Items in ``Categorical.unique()`` (and ``s.unique()`` if ``s`` is of dtype ``category``) now appear in the order in which they are originally found, not in sorted order (:issue:`9331`). This is now consistent with the behavior for other dtypes in pandas. + - Fixed bug on bug endian platforms which produced incorrect results in ``StataReader`` (:issue:`8688`). diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 28c9d096e06d3..960cc0dea361b 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1386,17 +1386,16 @@ def unique(self): """ Return the unique values. - Unused categories are NOT returned. + Unused categories are NOT returned. Unique values are returned in order + of appearance. Returns ------- unique values : array """ - unique_codes = np.unique(self.codes) - # for compatibility with normal unique, which has nan last - if unique_codes[0] == -1: - unique_codes[0:-1] = unique_codes[1:] - unique_codes[-1] = -1 + from pandas.core.nanops import unique1d + # unlike np.unique, unique1d does not sort + unique_codes = unique1d(self.codes) return take_1d(self.categories.values, unique_codes) def equals(self, other): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 7a8d5e0ac0032..de59b84f1fbc7 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -774,12 +774,15 @@ def test_unique(self): exp = np.asarray(["a","b"]) res = cat.unique() self.assert_numpy_array_equal(res, exp) + cat = Categorical(["a","b","a","a"], categories=["a","b","c"]) res = cat.unique() self.assert_numpy_array_equal(res, exp) - cat = Categorical(["a","b","a", np.nan], categories=["a","b","c"]) + + # unique should not sort + cat = Categorical(["b", "b", np.nan, "a"], categories=["a","b","c"]) res = cat.unique() - exp = np.asarray(["a","b", np.nan], dtype=object) + exp = np.asarray(["b", np.nan, "a"], dtype=object) self.assert_numpy_array_equal(res, exp) def test_mode(self):