Skip to content

Commit 861867c

Browse files
committed
API: add "level=" argument to MultiIndex.unique()
closes pandas-dev#17896
1 parent 63e8527 commit 861867c

File tree

6 files changed

+95
-16
lines changed

6 files changed

+95
-16
lines changed

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Other Enhancements
2424

2525
- Better support for :func:`Dataframe.style.to_excel` output with the ``xlsxwriter`` engine. (:issue:`16149`)
2626
- :func:`pandas.tseries.frequencies.to_offset` now accepts leading '+' signs e.g. '+1h'. (:issue:`18171`)
27+
- :func:`MultiIndex.unique` now supports the ``level=`` argument (:issue:`17896`)
2728
-
2829

2930
.. _whatsnew_0220.api_breaking:

pandas/core/indexes/base.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -3757,8 +3757,32 @@ def drop(self, labels, errors='raise'):
37573757
indexer = indexer[~mask]
37583758
return self.delete(indexer)
37593759

3760-
@Appender(base._shared_docs['unique'] % _index_doc_kwargs)
3761-
def unique(self):
3760+
base._shared_docs['index_unique'] = (
3761+
"""
3762+
Return unique values in the index. Uniques are returned in order
3763+
of appearance, this does NOT sort.
3764+
3765+
Parameters
3766+
----------
3767+
level : int or str, optional, default None
3768+
only return values from specified level (for MultiIndex)
3769+
3770+
.. versionadded:: 0.21.0
3771+
3772+
Returns
3773+
-------
3774+
Index without duplicates
3775+
3776+
See Also
3777+
--------
3778+
unique
3779+
Series.unique
3780+
""")
3781+
3782+
@Appender(base._shared_docs['index_unique'] % _index_doc_kwargs)
3783+
def unique(self, level=None):
3784+
if level not in {0, self.name, None}:
3785+
raise ValueError("Level {} not found".format(level))
37623786
result = super(Index, self).unique()
37633787
return self._shallow_copy(result)
37643788

pandas/core/indexes/category.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,9 @@ def is_monotonic_decreasing(self):
361361
return Index(self.codes).is_monotonic_decreasing
362362

363363
@Appender(base._shared_docs['unique'] % _index_doc_kwargs)
364-
def unique(self):
364+
def unique(self, level=None):
365+
if level not in {0, self.name, None}:
366+
raise ValueError("Level {} not found".format(level))
365367
result = base.IndexOpsMixin.unique(self)
366368
# CategoricalIndex._shallow_copy uses keeps original categories
367369
# and ordered if not otherwise specified

pandas/core/indexes/multi.py

+18-5
Original file line numberDiff line numberDiff line change
@@ -884,7 +884,7 @@ def _try_mi(k):
884884

885885
raise InvalidIndexError(key)
886886

887-
def _get_level_values(self, level):
887+
def _get_level_values(self, level, unique=False):
888888
"""
889889
Return vector of label values for requested level,
890890
equal to the length of the index
@@ -894,17 +894,21 @@ def _get_level_values(self, level):
894894
Parameters
895895
----------
896896
level : int level
897+
unique : bool, default False
898+
if True, drop duplicated values
897899
898900
Returns
899901
-------
900902
values : ndarray
901903
"""
902904

903-
unique = self.levels[level]
905+
values = self.levels[level]
904906
labels = self.labels[level]
905-
filled = algos.take_1d(unique._values, labels,
906-
fill_value=unique._na_value)
907-
values = unique._shallow_copy(filled)
907+
if unique:
908+
labels = algos.unique(labels)
909+
filled = algos.take_1d(values._values, labels,
910+
fill_value=values._na_value)
911+
values = values._shallow_copy(filled)
908912
return values
909913

910914
def get_level_values(self, level):
@@ -943,6 +947,15 @@ def get_level_values(self, level):
943947
values = self._get_level_values(level)
944948
return values
945949

950+
@Appender(base._shared_docs['index_unique'] % _index_doc_kwargs)
951+
def unique(self, level=None):
952+
953+
if level is None:
954+
return super(MultiIndex, self).unique()
955+
else:
956+
level = self._get_level_number(level)
957+
return self._get_level_values(level=level, unique=True)
958+
946959
def format(self, space=2, sparsify=None, adjoin=True, names=False,
947960
na_rep=None, formatter=None):
948961
if len(self) == 0:

pandas/tests/indexes/common.py

+23
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,29 @@ def test_duplicates(self, indices):
329329
assert not idx.is_unique
330330
assert idx.has_duplicates
331331

332+
def test_unique(self, indices):
333+
# don't test a MultiIndex here (as its tested separated)
334+
# don't test a CategoricalIndex because categories change (GH 18291)
335+
if isinstance(indices, (MultiIndex, CategoricalIndex)):
336+
return
337+
338+
# GH 17896
339+
expected = indices.drop_duplicates()
340+
for level in 0, indices.name, None:
341+
result = indices.unique(level=level)
342+
tm.assert_index_equal(result, expected)
343+
344+
for level in 3, 'wrong':
345+
msg = "Level {} not found".format(level)
346+
with tm.assert_raises_regex(ValueError, msg):
347+
indices.unique(level=level)
348+
349+
def test_unique_na(self, indices):
350+
idx = pd.Index([2, np.nan, 2, 1], name='my_index')
351+
expected = pd.Index([2, np.nan, 1], name='my_index')
352+
result = idx.unique()
353+
tm.assert_index_equal(result, expected)
354+
332355
def test_get_unique_index(self, indices):
333356
# MultiIndex tested separately
334357
if not len(indices) or isinstance(indices, MultiIndex):

pandas/tests/indexes/test_multi.py

+24-8
Original file line numberDiff line numberDiff line change
@@ -963,19 +963,21 @@ def test_get_level_values(self):
963963
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
964964
tm.assert_index_equal(index.get_level_values(1), exp)
965965

966-
def test_get_level_values_na(self):
966+
@pytest.mark.xfail(reason='GH 17924 (returns Int64Index with float data)')
967+
def test_get_level_values_int_with_na(self):
967968
arrays = [['a', 'b', 'b'], [1, np.nan, 2]]
968969
index = pd.MultiIndex.from_arrays(arrays)
969-
values = index.get_level_values(1)
970-
expected = np.array([1, np.nan, 2])
971-
tm.assert_numpy_array_equal(values.values.astype(float), expected)
970+
result = index.get_level_values(1)
971+
expected = Index([1, np.nan, 2])
972+
tm.assert_index_equal(result, expected)
972973

973974
arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]]
974975
index = pd.MultiIndex.from_arrays(arrays)
975-
values = index.get_level_values(1)
976-
expected = np.array([np.nan, np.nan, 2])
977-
tm.assert_numpy_array_equal(values.values.astype(float), expected)
976+
result = index.get_level_values(1)
977+
expected = Index([np.nan, np.nan, 2])
978+
tm.assert_index_equal(result, expected)
978979

980+
def test_get_level_values_na(self):
979981
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
980982
index = pd.MultiIndex.from_arrays(arrays)
981983
result = index.get_level_values(0)
@@ -990,7 +992,7 @@ def test_get_level_values_na(self):
990992
index = pd.MultiIndex.from_arrays(arrays)
991993
values = index.get_level_values(1)
992994
expected = pd.DatetimeIndex([0, 1, pd.NaT])
993-
tm.assert_numpy_array_equal(values.values, expected.values)
995+
tm.assert_index_equal(values, expected)
994996

995997
arrays = [[], []]
996998
index = pd.MultiIndex.from_arrays(arrays)
@@ -2277,6 +2279,20 @@ def test_unique(self):
22772279
exp = pd.MultiIndex.from_arrays([['a'], ['a']])
22782280
tm.assert_index_equal(res, exp)
22792281

2282+
@pytest.mark.parametrize('level', [0, 'first', 1, 'second'])
2283+
def test_unique_level(self, level):
2284+
# GH #17896 - with level= argument
2285+
result = self.index.unique(level=level)
2286+
expected = self.index.get_level_values(level).unique()
2287+
tm.assert_index_equal(result, expected)
2288+
2289+
# With already unique level
2290+
mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]],
2291+
names=['first', 'second'])
2292+
result = mi.unique(level=level)
2293+
expected = mi.get_level_values(level)
2294+
tm.assert_index_equal(result, expected)
2295+
22802296
def test_unique_datetimelike(self):
22812297
idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
22822298
'2015-01-01', 'NaT', 'NaT'])

0 commit comments

Comments
 (0)