Skip to content

Commit e266c3d

Browse files
committed
Merge pull request pandas-dev#9331 from shoyer/categorical-unique-order
BUG: don't sort unique values from categoricals
2 parents c37f8df + b787bf8 commit e266c3d

File tree

3 files changed

+12
-8
lines changed

3 files changed

+12
-8
lines changed

doc/source/whatsnew/v0.16.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ Bug Fixes
193193
SQLAlchemy type (:issue:`9083`).
194194

195195

196+
- Items in ``Categorical.unique()`` (and ``s.unique()`` if ``s`` is of dtype ``category``) now appear in the order in which they are originally found, not in sorted order (:issue:`9331`). This is now consistent with the behavior for other dtypes in pandas.
197+
196198

197199
- Fixed bug on bug endian platforms which produced incorrect results in ``StataReader`` (:issue:`8688`).
198200

pandas/core/categorical.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -1385,17 +1385,16 @@ def unique(self):
13851385
"""
13861386
Return the unique values.
13871387
1388-
Unused categories are NOT returned.
1388+
Unused categories are NOT returned. Unique values are returned in order
1389+
of appearance.
13891390
13901391
Returns
13911392
-------
13921393
unique values : array
13931394
"""
1394-
unique_codes = np.unique(self.codes)
1395-
# for compatibility with normal unique, which has nan last
1396-
if unique_codes[0] == -1:
1397-
unique_codes[0:-1] = unique_codes[1:]
1398-
unique_codes[-1] = -1
1395+
from pandas.core.nanops import unique1d
1396+
# unlike np.unique, unique1d does not sort
1397+
unique_codes = unique1d(self.codes)
13991398
return take_1d(self.categories.values, unique_codes)
14001399

14011400
def equals(self, other):

pandas/tests/test_categorical.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -818,12 +818,15 @@ def test_unique(self):
818818
exp = np.asarray(["a","b"])
819819
res = cat.unique()
820820
self.assert_numpy_array_equal(res, exp)
821+
821822
cat = Categorical(["a","b","a","a"], categories=["a","b","c"])
822823
res = cat.unique()
823824
self.assert_numpy_array_equal(res, exp)
824-
cat = Categorical(["a","b","a", np.nan], categories=["a","b","c"])
825+
826+
# unique should not sort
827+
cat = Categorical(["b", "b", np.nan, "a"], categories=["a","b","c"])
825828
res = cat.unique()
826-
exp = np.asarray(["a","b", np.nan], dtype=object)
829+
exp = np.asarray(["b", np.nan, "a"], dtype=object)
827830
self.assert_numpy_array_equal(res, exp)
828831

829832
def test_mode(self):

0 commit comments

Comments
 (0)