Skip to content

Commit 2304b1a

Browse files
committed
DOC/TST: add pd.unique doc-string & buggy return of Categorical
closes pandas-dev#9346
1 parent 751119f commit 2304b1a

File tree

3 files changed

+97
-4
lines changed

3 files changed

+97
-4
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,7 @@ Conversion
11471147
- Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`)
11481148
- Bug in ``DataFrame.fillna()`` with tz-aware datetimes (:issue:`15855`)
11491149
- Bug in ``is_string_dtype``, ``is_timedelta64_ns_dtype``, and ``is_string_like_dtype`` in which an error was raised when ``None`` was passed in (:issue:`15941`)
1150+
- Bug in the return type of ``pd.unique`` on a ``Categorical``, which was returning an ndarray and not a ``Categorical`` (:issue:`15903`)
11501151

11511152
Indexing
11521153
^^^^^^^^

pandas/core/algorithms.py

+51-4
Original file line numberDiff line numberDiff line change
@@ -267,11 +267,58 @@ def match(to_match, values, na_sentinel=-1):
267267
return result
268268

269269

270-
def unique1d(values):
270+
def unique(values):
271271
"""
272-
Hash table-based unique
272+
Hash table-based unique. uniques are returned in order
273+
of appearance. This does NOT sort.
274+
275+
Parameters
276+
----------
277+
values : 1d array-like
278+
279+
Returns
280+
-------
281+
unique values. The returned type will be a pandas Index
282+
if a pandas type is input, otherwise ndarray
283+
284+
Examples
285+
--------
286+
pd.unique(pd.Series([2, 1, 3, 3]))
287+
array([2, 1, 3])
288+
289+
>>> pd.unique(pd.Series([2] + [1] * 5))
290+
array([2, 1])
291+
292+
>>> pd.unique(Series([pd.Timestamp('20160101'),
293+
... pd.Timestamp('20160101')]))
294+
array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
295+
296+
>>> pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
297+
... pd.Timestamp('20160101', tz='US/Eastern')]))
298+
DatetimeIndex(['2016-01-01 00:00:00-05:00'],
299+
... dtype='datetime64[ns, US/Eastern]', freq=None)
300+
301+
>>> pd.unique(list('aabc'))
302+
array(['a', 'b', 'c'], dtype=object)
303+
304+
>>> pd.unique(Series(pd.Categorical(list('aabc'))))
305+
0 a
306+
1 b
307+
2 c
308+
dtype: category
309+
Categories (3, object): [a, b, c]
310+
273311
"""
274312
values = _ensure_arraylike(values)
313+
314+
# categorical is a fast-path
315+
if is_categorical_dtype(values):
316+
317+
if isinstance(values, ABCSeries):
318+
from pandas import Series
319+
return Series(values.values.unique(), name=values.name)
320+
return values.unique()
321+
275322
original = values
276323
htable, _, values, dtype, ndtype = _get_hashtable_algo(values)
277324

@@ -282,7 +329,7 @@ def unique1d(values):
282329
return uniques
283330

284331

285-
unique = unique1d
332+
unique1d = unique
286333

287334

288335
def isin(comps, values):
@@ -651,7 +698,7 @@ def mode(values):
651698
if is_categorical_dtype(values):
652699

653700
if isinstance(values, Series):
654-
return Series(values.values.mode())
701+
return Series(values.values.mode(), name=values.name)
655702
return values.mode()
656703

657704
values, dtype, ndtype = _ensure_data(values)

pandas/tests/test_algos.py

+45
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,51 @@ def test_uint64_overflow(self):
384384
exp = np.array([1, 2, 2**63], dtype=np.uint64)
385385
tm.assert_numpy_array_equal(algos.unique(s), exp)
386386

387+
def test_categorical(self):
388+
c = pd.Categorical(list('aabc'))
389+
result = c.unique()
390+
expected = pd.Categorical(list('abc'))
391+
tm.assert_categorical_equal(result, expected)
392+
393+
result = algos.unique(c)
394+
tm.assert_categorical_equal(result, expected)
395+
396+
result = algos.unique(Series(c, name='foo'))
397+
expected = Series(expected, name='foo')
398+
tm.assert_series_equal(result, expected)
399+
400+
def test_order_of_appearance(self):
401+
# 9346
402+
# light testing of guarantee of order of appearance
403+
# these also are the doc-examples
404+
result = pd.unique(pd.Series([2, 1, 3, 3]))
405+
tm.assert_numpy_array_equal(result, np.array([2, 1, 3]))
406+
407+
result = pd.unique(pd.Series([2] + [1] * 5))
408+
tm.assert_numpy_array_equal(result, np.array([2, 1]))
409+
410+
result = pd.unique(Series([pd.Timestamp('20160101'),
411+
pd.Timestamp('20160101')]))
412+
expected = np.array(['2016-01-01T00:00:00.000000000'],
413+
dtype='datetime64[ns]')
414+
tm.assert_numpy_array_equal(result, expected)
415+
416+
result = pd.unique(pd.Index(
417+
[pd.Timestamp('20160101', tz='US/Eastern'),
418+
pd.Timestamp('20160101', tz='US/Eastern')]))
419+
expected = pd.DatetimeIndex(['2016-01-01 00:00:00'],
420+
dtype='datetime64[ns, US/Eastern]',
421+
freq=None)
422+
tm.assert_index_equal(result, expected)
423+
424+
result = pd.unique(list('aabc'))
425+
expected = np.array(['a', 'b', 'c'], dtype=object)
426+
tm.assert_numpy_array_equal(result, expected)
427+
428+
result = pd.unique(Series(pd.Categorical(list('aabc'))))
429+
expected = Series(pd.Categorical(list('abc')))
430+
tm.assert_series_equal(result, expected)
431+
387432

388433
class TestIsin(tm.TestCase):
389434

0 commit comments

Comments
 (0)