From acc549d0395362bbdb0d02c71a897d992e042876 Mon Sep 17 00:00:00 2001 From: Jan Schulz Date: Sat, 29 Nov 2014 22:41:52 +0100 Subject: [PATCH] Categorical: let unique only return used categories --- doc/source/whatsnew/v0.15.2.txt | 3 +++ pandas/core/categorical.py | 9 +++++++-- pandas/tests/test_categorical.py | 12 +++++++++--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index d6d36fd8d14ba..377a9dea126e6 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -42,6 +42,9 @@ API changes - Bug in concat of Series with ``category`` dtype which were coercing to ``object``. (:issue:`8641`) +- Bug in unique of Series with ``category`` dtype, which returned all categories regardless + whether they were "used" or not (see :issue:`8559` for the discussion). + - ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters. ``Series.all``, ``Series.any``, ``Index.all``, and ``Index.any`` no longer support the ``out`` and ``keepdims`` parameters, which existed for compatibility with ndarray. Various index types no longer support the ``all`` and ``any`` aggregation functions and will now raise ``TypeError``. (:issue:`8302`): .. ipython:: python diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index eb0429ad4a0cd..7dfdc88dddbff 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -1326,13 +1326,18 @@ def unique(self): """ Return the unique values. - This includes all categories, even if one or more is unused. + Unused categories are NOT returned. Returns ------- unique values : array """ - return np.asarray(self.categories) + unique_codes = np.unique(self.codes) + # for compatibility with normal unique, which has nan last + if unique_codes[0] == -1: + unique_codes[0:-1] = unique_codes[1:] + unique_codes[-1] = -1 + return take_1d(self.categories.values, unique_codes) def equals(self, other): """ diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index dc82abfb40e02..196ad8b7680b9 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -769,11 +769,17 @@ def test_min_max(self): self.assertEqual(_max, 1) def test_unique(self): - cat = Categorical(["a","b","c","d"]) - exp = np.asarray(["a","b","c","d"]) + cat = Categorical(["a","b"]) + exp = np.asarray(["a","b"]) res = cat.unique() self.assert_numpy_array_equal(res, exp) - self.assertEqual(type(res), type(exp)) + cat = Categorical(["a","b","a","a"], categories=["a","b","c"]) + res = cat.unique() + self.assert_numpy_array_equal(res, exp) + cat = Categorical(["a","b","a", np.nan], categories=["a","b","c"]) + res = cat.unique() + exp = np.asarray(["a","b", np.nan], dtype=object) + self.assert_numpy_array_equal(res, exp) def test_mode(self): s = Categorical([1,1,2,4,5,5,5], categories=[5,4,3,2,1], ordered=True)