Skip to content

Commit 3617e2a

Browse files
committed
API: add "level=" argument to MultiIndex.unique()
closes pandas-dev#17896
1 parent 5bf7f9a commit 3617e2a

File tree

3 files changed

+54
-5
lines changed

3 files changed

+54
-5
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,7 @@ Other API Changes
792792
- Pandas no longer registers matplotlib converters on import. The converters
793793
will be registered and used when the first plot is draw (:issue:`17710`)
794794
- Setting on a column with a scalar value and 0-len index now raises a ``ValueError`` (:issue:`16823`)
795+
- :func:`MultiIndex.unique` now supports the ``level=`` argument (:issue:`17896`)
795796

796797

797798
.. _whatsnew_0210.deprecations:

pandas/core/indexes/multi.py

+39-5
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,7 @@ def _try_mi(k):
886886

887887
raise InvalidIndexError(key)
888888

889-
def _get_level_values(self, level):
889+
def _get_level_values(self, level, unique=False):
890890
"""
891891
Return vector of label values for requested level,
892892
equal to the length of the index
@@ -896,17 +896,23 @@ def _get_level_values(self, level):
896896
Parameters
897897
----------
898898
level : int level
899+
unique : bool
900+
if True, drop duplicated values
901+
902+
.. versionadded:: 0.21.0
899903
900904
Returns
901905
-------
902906
values : ndarray
903907
"""
904908

905-
unique = self.levels[level]
909+
values = self.levels[level]
906910
labels = self.labels[level]
907-
filled = algos.take_1d(unique._values, labels,
908-
fill_value=unique._na_value)
909-
values = unique._shallow_copy(filled)
911+
if unique:
912+
labels = algos.unique(labels[labels != -1])
913+
filled = algos.take_1d(values._values, labels,
914+
fill_value=values._na_value)
915+
values = values._shallow_copy(filled)
910916
return values
911917

912918
def get_level_values(self, level):
@@ -945,6 +951,34 @@ def get_level_values(self, level):
945951
values = self._get_level_values(level)
946952
return values
947953

954+
def unique(self, level=None):
955+
"""
956+
Return unique values in the index. Uniques are returned in order
957+
of appearance, this does NOT sort.
958+
959+
Parameters
960+
----------
961+
level : int, optional, defaults None
962+
only return values from specific level
963+
964+
.. versionadded:: 0.21.0
965+
966+
Returns
967+
-------
968+
Index without duplicates (MultiIndex as long as level=None)
969+
970+
See Also
971+
--------
972+
unique
973+
Index.unique
974+
Series.unique
975+
"""
976+
977+
if level is None:
978+
return super(MultiIndex, self).unique()
979+
else:
980+
return self._get_level_values(level=level, unique=True)
981+
948982
def format(self, space=2, sparsify=None, adjoin=True, names=False,
949983
na_rep=None, formatter=None):
950984
if len(self) == 0:

pandas/tests/indexes/test_multi.py

+14
Original file line numberDiff line numberDiff line change
@@ -2268,6 +2268,20 @@ def test_unique(self):
22682268
exp = pd.MultiIndex.from_arrays([['a'], ['a']])
22692269
tm.assert_index_equal(res, exp)
22702270

2271+
# GH #17896 - with level= argument
2272+
result = self.index.unique(level=0)
2273+
expected = Index(['foo', 'bar', 'baz', 'qux'],
2274+
name='first')
2275+
tm.assert_index_equal(result, expected)
2276+
assert result.name == 'first'
2277+
2278+
# with NaNs
2279+
arrays = [['a', 'b', 'b'], [2, np.nan, 2]]
2280+
index = pd.MultiIndex.from_arrays(arrays)
2281+
values = index.unique(level=1)
2282+
expected = np.array([2], dtype=np.int64)
2283+
tm.assert_numpy_array_equal(values.values, expected)
2284+
22712285
def test_unique_datetimelike(self):
22722286
idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
22732287
'2015-01-01', 'NaT', 'NaT'])

0 commit comments

Comments
 (0)