Skip to content

Commit efb1a1b

Browse files
committed
API: add "level=" argument to MultiIndex.unique()
closes pandas-dev#17896
1 parent 96a5274 commit efb1a1b

File tree

6 files changed

+94
-16
lines changed

6 files changed

+94
-16
lines changed

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Other Enhancements
2424

2525
- Better support for :func:`Dataframe.style.to_excel` output with the ``xlsxwriter`` engine. (:issue:`16149`)
2626
- :func:`pandas.tseries.frequencies.to_offset` now accepts leading '+' signs e.g. '+1h'. (:issue:`18171`)
27+
- :func:`MultiIndex.unique` now supports the ``level=`` argument (:issue:`17896`)
2728
-
2829

2930
.. _whatsnew_0220.api_breaking:

pandas/core/indexes/base.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -3757,8 +3757,32 @@ def drop(self, labels, errors='raise'):
37573757
indexer = indexer[~mask]
37583758
return self.delete(indexer)
37593759

3760-
@Appender(base._shared_docs['unique'] % _index_doc_kwargs)
3761-
def unique(self):
3760+
base._shared_docs['index_unique'] = (
3761+
"""
3762+
Return unique values in the index. Uniques are returned in order
3763+
of appearance, this does NOT sort.
3764+
3765+
Parameters
3766+
----------
3767+
level : int or str, optional, default None
3768+
only return values from specified level (for MultiIndex)
3769+
3770+
.. versionadded:: 0.21.0
3771+
3772+
Returns
3773+
-------
3774+
Index without duplicates
3775+
3776+
See Also
3777+
--------
3778+
unique
3779+
Series.unique
3780+
""")
3781+
3782+
@Appender(base._shared_docs['index_unique'] % _index_doc_kwargs)
3783+
def unique(self, level=None):
3784+
if level not in {0, self.name, None}:
3785+
raise ValueError("Level {} not found".format(level))
37623786
result = super(Index, self).unique()
37633787
return self._shallow_copy(result)
37643788

pandas/core/indexes/category.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,9 @@ def is_monotonic_decreasing(self):
361361
return Index(self.codes).is_monotonic_decreasing
362362

363363
@Appender(base._shared_docs['unique'] % _index_doc_kwargs)
364-
def unique(self):
364+
def unique(self, level=None):
365+
if level not in {0, self.name, None}:
366+
raise ValueError("Level {} not found".format(level))
365367
result = base.IndexOpsMixin.unique(self)
366368
# CategoricalIndex._shallow_copy uses keeps original categories
367369
# and ordered if not otherwise specified

pandas/core/indexes/multi.py

+18-5
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,7 @@ def _try_mi(k):
886886

887887
raise InvalidIndexError(key)
888888

889-
def _get_level_values(self, level):
889+
def _get_level_values(self, level, unique=False):
890890
"""
891891
Return vector of label values for requested level,
892892
equal to the length of the index
@@ -896,17 +896,21 @@ def _get_level_values(self, level):
896896
Parameters
897897
----------
898898
level : int level
899+
unique : bool, default False
900+
if True, drop duplicated values
899901
900902
Returns
901903
-------
902904
values : ndarray
903905
"""
904906

905-
unique = self.levels[level]
907+
values = self.levels[level]
906908
labels = self.labels[level]
907-
filled = algos.take_1d(unique._values, labels,
908-
fill_value=unique._na_value)
909-
values = unique._shallow_copy(filled)
909+
if unique:
910+
labels = algos.unique(labels)
911+
filled = algos.take_1d(values._values, labels,
912+
fill_value=values._na_value)
913+
values = values._shallow_copy(filled)
910914
return values
911915

912916
def get_level_values(self, level):
@@ -945,6 +949,15 @@ def get_level_values(self, level):
945949
values = self._get_level_values(level)
946950
return values
947951

952+
@Appender(base._shared_docs['index_unique'] % _index_doc_kwargs)
953+
def unique(self, level=None):
954+
955+
if level is None:
956+
return super(MultiIndex, self).unique()
957+
else:
958+
level = self._get_level_number(level)
959+
return self._get_level_values(level=level, unique=True)
960+
948961
def format(self, space=2, sparsify=None, adjoin=True, names=False,
949962
na_rep=None, formatter=None):
950963
if len(self) == 0:

pandas/tests/indexes/common.py

+22
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,28 @@ def test_duplicates(self, indices):
329329
assert not idx.is_unique
330330
assert idx.has_duplicates
331331

332+
def test_unique(self, indices):
333+
# don't tests a MultiIndex here (as its tested separated)
334+
if isinstance(indices, MultiIndex):
335+
return
336+
337+
# GH 17896
338+
expected = indices.drop_duplicates()
339+
for level in 0, indices.name, None:
340+
result = indices.unique(level=level)
341+
tm.assert_index_equal(result, expected)
342+
343+
for level in 3, 'wrong':
344+
msg = "Level {} not found".format(level)
345+
with tm.assert_raises_regex(ValueError, msg):
346+
indices.unique(level=level)
347+
348+
def test_unique_na(self, indices):
349+
idx = pd.Index([2, np.nan, 2, 1], name='my_index')
350+
expected = pd.Index([2, np.nan, 1], name='my_index')
351+
result = idx.unique()
352+
tm.assert_index_equal(result, expected)
353+
332354
def test_get_unique_index(self, indices):
333355
# MultiIndex tested separately
334356
if not len(indices) or isinstance(indices, MultiIndex):

pandas/tests/indexes/test_multi.py

+24-8
Original file line numberDiff line numberDiff line change
@@ -955,19 +955,21 @@ def test_get_level_values(self):
955955
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
956956
tm.assert_index_equal(index.get_level_values(1), exp)
957957

958-
def test_get_level_values_na(self):
958+
@pytest.mark.xfail(reason='GH 17924 (returns Int64Index with float data)')
959+
def test_get_level_values_int_with_na(self):
959960
arrays = [['a', 'b', 'b'], [1, np.nan, 2]]
960961
index = pd.MultiIndex.from_arrays(arrays)
961-
values = index.get_level_values(1)
962-
expected = np.array([1, np.nan, 2])
963-
tm.assert_numpy_array_equal(values.values.astype(float), expected)
962+
result = index.get_level_values(1)
963+
expected = Index([1, np.nan, 2])
964+
tm.assert_index_equal(result, expected)
964965

965966
arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]]
966967
index = pd.MultiIndex.from_arrays(arrays)
967-
values = index.get_level_values(1)
968-
expected = np.array([np.nan, np.nan, 2])
969-
tm.assert_numpy_array_equal(values.values.astype(float), expected)
968+
result = index.get_level_values(1)
969+
expected = Index([np.nan, np.nan, 2])
970+
tm.assert_index_equal(result, expected)
970971

972+
def test_get_level_values_na(self):
971973
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
972974
index = pd.MultiIndex.from_arrays(arrays)
973975
result = index.get_level_values(0)
@@ -982,7 +984,7 @@ def test_get_level_values_na(self):
982984
index = pd.MultiIndex.from_arrays(arrays)
983985
values = index.get_level_values(1)
984986
expected = pd.DatetimeIndex([0, 1, pd.NaT])
985-
tm.assert_numpy_array_equal(values.values, expected.values)
987+
tm.assert_index_equal(values, expected)
986988

987989
arrays = [[], []]
988990
index = pd.MultiIndex.from_arrays(arrays)
@@ -2269,6 +2271,20 @@ def test_unique(self):
22692271
exp = pd.MultiIndex.from_arrays([['a'], ['a']])
22702272
tm.assert_index_equal(res, exp)
22712273

2274+
@pytest.mark.parametrize('level', [0, 'first', 1, 'second'])
2275+
def test_unique_level(self, level):
2276+
# GH #17896 - with level= argument
2277+
result = self.index.unique(level=level)
2278+
expected = self.index.get_level_values(level).unique()
2279+
tm.assert_index_equal(result, expected)
2280+
2281+
# With already unique level
2282+
mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]],
2283+
names=['first', 'second'])
2284+
result = mi.unique(level=level)
2285+
expected = mi.get_level_values(level)
2286+
tm.assert_index_equal(result, expected)
2287+
22722288
def test_unique_datetimelike(self):
22732289
idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
22742290
'2015-01-01', 'NaT', 'NaT'])

0 commit comments

Comments
 (0)