Skip to content

Commit acc549d

Browse files
committed
Categorical: let unique only return used categories
1 parent ff0756f commit acc549d

File tree

3 files changed

+19
-5
lines changed

3 files changed

+19
-5
lines changed

doc/source/whatsnew/v0.15.2.txt

+3
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ API changes
4242

4343
- Bug in concat of Series with ``category`` dtype which were coercing to ``object``. (:issue:`8641`)
4444

45+
- Bug in unique of Series with ``category`` dtype, which returned all categories regardless
46+
whether they were "used" or not (see :issue:`8559` for the discussion).
47+
4548
- ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters. ``Series.all``, ``Series.any``, ``Index.all``, and ``Index.any`` no longer support the ``out`` and ``keepdims`` parameters, which existed for compatibility with ndarray. Various index types no longer support the ``all`` and ``any`` aggregation functions and will now raise ``TypeError``. (:issue:`8302`):
4649

4750
.. ipython:: python

pandas/core/categorical.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -1326,13 +1326,18 @@ def unique(self):
13261326
"""
13271327
Return the unique values.
13281328
1329-
This includes all categories, even if one or more is unused.
1329+
Unused categories are NOT returned.
13301330
13311331
Returns
13321332
-------
13331333
unique values : array
13341334
"""
1335-
return np.asarray(self.categories)
1335+
unique_codes = np.unique(self.codes)
1336+
# for compatibility with normal unique, which has nan last
1337+
if unique_codes[0] == -1:
1338+
unique_codes[0:-1] = unique_codes[1:]
1339+
unique_codes[-1] = -1
1340+
return take_1d(self.categories.values, unique_codes)
13361341

13371342
def equals(self, other):
13381343
"""

pandas/tests/test_categorical.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -769,11 +769,17 @@ def test_min_max(self):
769769
self.assertEqual(_max, 1)
770770

771771
def test_unique(self):
772-
cat = Categorical(["a","b","c","d"])
773-
exp = np.asarray(["a","b","c","d"])
772+
cat = Categorical(["a","b"])
773+
exp = np.asarray(["a","b"])
774774
res = cat.unique()
775775
self.assert_numpy_array_equal(res, exp)
776-
self.assertEqual(type(res), type(exp))
776+
cat = Categorical(["a","b","a","a"], categories=["a","b","c"])
777+
res = cat.unique()
778+
self.assert_numpy_array_equal(res, exp)
779+
cat = Categorical(["a","b","a", np.nan], categories=["a","b","c"])
780+
res = cat.unique()
781+
exp = np.asarray(["a","b", np.nan], dtype=object)
782+
self.assert_numpy_array_equal(res, exp)
777783

778784
def test_mode(self):
779785
s = Categorical([1,1,2,4,5,5,5], categories=[5,4,3,2,1], ordered=True)

0 commit comments

Comments
 (0)