Skip to content

Commit 7267544

Browse files
committed
ENH: ExtensionArray.unique
1 parent 01e99de commit 7267544

File tree

4 files changed

+33
-3
lines changed

4 files changed

+33
-3
lines changed

pandas/core/algorithms.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
ABCSeries, ABCIndex,
1313
ABCIndexClass, ABCCategorical)
1414
from pandas.core.dtypes.common import (
15+
is_array_like,
1516
is_unsigned_integer_dtype, is_signed_integer_dtype,
1617
is_integer_dtype, is_complex_dtype,
1718
is_object_dtype,
@@ -168,8 +169,7 @@ def _ensure_arraylike(values):
168169
"""
169170
ensure that we are arraylike if not already
170171
"""
171-
if not isinstance(values, (np.ndarray, ABCCategorical,
172-
ABCIndexClass, ABCSeries)):
172+
if not is_array_like(values):
173173
inferred = lib.infer_dtype(values)
174174
if inferred in ['mixed', 'string', 'unicode']:
175175
if isinstance(values, tuple):
@@ -356,7 +356,7 @@ def unique(values):
356356
# categorical is a fast-path
357357
# this will coerce Categorical, CategoricalIndex,
358358
# and category dtypes Series to same return of Category
359-
if is_categorical_dtype(values):
359+
if is_extension_array_dtype(values):
360360
values = getattr(values, '.values', values)
361361
return values.unique()
362362

pandas/core/arrays/base.py

+12
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,18 @@ def isna(self):
216216
"""
217217
raise AbstractMethodError(self)
218218

219+
def unique(self):
220+
"""Compute the ExtensionArray of unique values.
221+
222+
Returns
223+
-------
224+
uniques : ExtensionArray
225+
"""
226+
from pandas import unique
227+
228+
uniques = unique(self.astype(object))
229+
return type(self)(uniques)
230+
219231
# ------------------------------------------------------------------------
220232
# Indexing methods
221233
# ------------------------------------------------------------------------

pandas/tests/extension/base/methods.py

+11
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,14 @@ def test_count(self, data_missing):
3030
def test_apply_simple_series(self, data):
3131
result = pd.Series(data).apply(id)
3232
assert isinstance(result, pd.Series)
33+
34+
@pytest.mark.parametrize('box', [pd.Series, lambda x: x])
35+
@pytest.mark.parametrize('method', [lambda x: x.unique(), pd.unique])
36+
def test_unique(self, data, box, method):
37+
duplicated = box(type(data)([data[0], data[0]]))
38+
39+
result = method(duplicated)
40+
41+
assert len(result) == 1
42+
assert isinstance(result, type(data))
43+
assert result[0] == duplicated[0]

pandas/tests/extension/json/array.py

+7
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,13 @@ def take(self, indexer, allow_fill=True, fill_value=None):
8282
def copy(self, deep=False):
8383
return type(self)(self.data[:])
8484

85+
def unique(self):
86+
# Parent method doesn't work since np.array will try to infer
87+
# a 2-dim object.
88+
return type(self)([
89+
dict(x) for x in list(set(tuple(d.items()) for d in self.data))
90+
])
91+
8592
@property
8693
def _na_value(self):
8794
return {}

0 commit comments

Comments
 (0)