Skip to content

Commit a99f9ac

Browse files
committed
API: add "level=" argument to MultiIndex.unique()
closes pandas-dev#17896
1 parent 5bf7f9a commit a99f9ac

File tree

3 files changed

+53
-5
lines changed

3 files changed

+53
-5
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,7 @@ Other API Changes
792792
- Pandas no longer registers matplotlib converters on import. The converters
793793
will be registered and used when the first plot is draw (:issue:`17710`)
794794
- Setting on a column with a scalar value and 0-len index now raises a ``ValueError`` (:issue:`16823`)
795+
- :func:`MultiIndex.unique` now supports the ``level=`` argument (:issue:`17896`)
795796

796797

797798
.. _whatsnew_0210.deprecations:

pandas/core/indexes/multi.py

+37-5
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,7 @@ def _try_mi(k):
886886

887887
raise InvalidIndexError(key)
888888

889-
def _get_level_values(self, level):
889+
def _get_level_values(self, level, unique=False):
890890
"""
891891
Return vector of label values for requested level,
892892
equal to the length of the index
@@ -896,17 +896,21 @@ def _get_level_values(self, level):
896896
Parameters
897897
----------
898898
level : int level
899+
unique : bool
900+
if True, drop duplicated values
899901
900902
Returns
901903
-------
902904
values : ndarray
903905
"""
904906

905-
unique = self.levels[level]
907+
values = self.levels[level]
906908
labels = self.labels[level]
907-
filled = algos.take_1d(unique._values, labels,
908-
fill_value=unique._na_value)
909-
values = unique._shallow_copy(filled)
909+
if unique:
910+
labels = algos.unique(labels[labels != -1])
911+
filled = algos.take_1d(values._values, labels,
912+
fill_value=values._na_value)
913+
values = values._shallow_copy(filled)
910914
return values
911915

912916
def get_level_values(self, level):
@@ -945,6 +949,34 @@ def get_level_values(self, level):
945949
values = self._get_level_values(level)
946950
return values
947951

952+
def unique(self, level=None):
953+
"""
954+
Return unique values in the index. Uniques are returned in order
955+
of appearance, this does NOT sort.
956+
957+
Parameters
958+
----------
959+
level : int, optional, defaults None
960+
only return values from specific level
961+
962+
.. versionadded:: 0.21.0
963+
964+
Returns
965+
-------
966+
Index without duplicates (MultiIndex as long as level=None)
967+
968+
See Also
969+
--------
970+
unique
971+
Index.unique
972+
Series.unique
973+
"""
974+
975+
if level is None:
976+
return super(MultiIndex, self).unique()
977+
else:
978+
return self._get_level_values(level=level, unique=True)
979+
948980
def format(self, space=2, sparsify=None, adjoin=True, names=False,
949981
na_rep=None, formatter=None):
950982
if len(self) == 0:

pandas/tests/indexes/test_multi.py

+15
Original file line numberDiff line numberDiff line change
@@ -2268,6 +2268,21 @@ def test_unique(self):
22682268
exp = pd.MultiIndex.from_arrays([['a'], ['a']])
22692269
tm.assert_index_equal(res, exp)
22702270

2271+
# GH #17896 - with level= argument
2272+
result = self.index.unique(level=0)
2273+
expected = Index(['foo', 'bar', 'baz', 'qux'],
2274+
name='first')
2275+
tm.assert_index_equal(result, expected)
2276+
assert result.name == 'first'
2277+
2278+
# with NaNs
2279+
arrays = [['a', 'b', 'b'], [2, np.nan, 2]]
2280+
index = pd.MultiIndex.from_arrays(arrays)
2281+
values = index.unique(level=1)
2282+
expected = np.array([2], dtype=np.int64)
2283+
tm.assert_numpy_array_equal(values.values, expected)
2284+
2285+
22712286
def test_unique_datetimelike(self):
22722287
idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
22732288
'2015-01-01', 'NaT', 'NaT'])

0 commit comments

Comments
 (0)