Skip to content

Commit a07e2d8

Browse files
committed
API: add "level=" argument to MultiIndex.unique()
closes pandas-dev#17896
1 parent a355ed2 commit a07e2d8

File tree

5 files changed

+84
-15
lines changed

5 files changed

+84
-15
lines changed

doc/source/whatsnew/v0.21.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ Conversion
6868
Indexing
6969
^^^^^^^^
7070

71-
-
71+
- :func:`MultiIndex.unique` now supports the ``level=`` argument (:issue:`17896`)
7272
-
7373
-
7474

pandas/core/indexes/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -3743,7 +3743,9 @@ def drop(self, labels, errors='raise'):
37433743
return self.delete(indexer)
37443744

37453745
@Appender(base._shared_docs['unique'] % _index_doc_kwargs)
3746-
def unique(self):
3746+
def unique(self, level=None):
3747+
if level not in {0, self.name, None}:
3748+
raise ValueError("Level {} not found".format(level))
37473749
result = super(Index, self).unique()
37483750
return self._shallow_copy(result)
37493751

pandas/core/indexes/multi.py

+38-5
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,7 @@ def _try_mi(k):
886886

887887
raise InvalidIndexError(key)
888888

889-
def _get_level_values(self, level):
889+
def _get_level_values(self, level, unique=False):
890890
"""
891891
Return vector of label values for requested level,
892892
equal to the length of the index
@@ -896,17 +896,21 @@ def _get_level_values(self, level):
896896
Parameters
897897
----------
898898
level : int level
899+
unique : bool
900+
if True, drop duplicated values
899901
900902
Returns
901903
-------
902904
values : ndarray
903905
"""
904906

905-
unique = self.levels[level]
907+
values = self.levels[level]
906908
labels = self.labels[level]
907-
filled = algos.take_1d(unique._values, labels,
908-
fill_value=unique._na_value)
909-
values = unique._shallow_copy(filled)
909+
if unique:
910+
labels = algos.unique(labels)
911+
filled = algos.take_1d(values._values, labels,
912+
fill_value=values._na_value)
913+
values = values._shallow_copy(filled)
910914
return values
911915

912916
def get_level_values(self, level):
@@ -945,6 +949,35 @@ def get_level_values(self, level):
945949
values = self._get_level_values(level)
946950
return values
947951

952+
def unique(self, level=None):
953+
"""
954+
Return unique values in the index. Uniques are returned in order
955+
of appearance, this does NOT sort.
956+
957+
Parameters
958+
----------
959+
level : int or str, optional, defaults None
960+
only return values from specified level
961+
962+
.. versionadded:: 0.21.0
963+
964+
Returns
965+
-------
966+
Index without duplicates (MultiIndex as long as level=None)
967+
968+
See Also
969+
--------
970+
unique
971+
Index.unique
972+
Series.unique
973+
"""
974+
975+
if level is None:
976+
return super(MultiIndex, self).unique()
977+
else:
978+
level = self._get_level_number(level)
979+
return self._get_level_values(level=level, unique=True)
980+
948981
def format(self, space=2, sparsify=None, adjoin=True, names=False,
949982
na_rep=None, formatter=None):
950983
if len(self) == 0:

pandas/tests/indexes/test_base.py

+18
Original file line numberDiff line numberDiff line change
@@ -1484,6 +1484,24 @@ def test_get_level_values(self):
14841484
result = index_with_name.get_level_values('a')
14851485
tm.assert_index_equal(result, index_with_name)
14861486

1487+
def test_unique(self):
1488+
idx = pd.Index([2, 3, 2, 1], name='my_index')
1489+
expected = pd.Index([2, 3, 1], name='my_index')
1490+
for level in 0, 'my_index', None:
1491+
result = idx.unique(level=level)
1492+
tm.assert_index_equal(result, expected)
1493+
1494+
for level in 3, 'wrong', {'also'}:
1495+
msg = "Level {} not found".format(level)
1496+
with tm.assert_raises_regex(ValueError, msg):
1497+
idx.unique(level=level)
1498+
1499+
# with NaNs
1500+
idx = pd.Index([2, np.nan, 2, 1], name='my_index')
1501+
expected = pd.Index([2, np.nan, 1], name='my_index')
1502+
result = idx.unique()
1503+
tm.assert_index_equal(result, expected)
1504+
14871505
def test_slice_keep_name(self):
14881506
idx = Index(['a', 'b'], name='asdf')
14891507
assert idx.name == idx[1:].name

pandas/tests/indexes/test_multi.py

+24-8
Original file line numberDiff line numberDiff line change
@@ -955,19 +955,21 @@ def test_get_level_values(self):
955955
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
956956
tm.assert_index_equal(index.get_level_values(1), exp)
957957

958-
def test_get_level_values_na(self):
958+
@pytest.mark.xfail(reason='GH 17924 (returns Int64Index with float data)')
959+
def test_get_level_values_int_with_na(self):
959960
arrays = [['a', 'b', 'b'], [1, np.nan, 2]]
960961
index = pd.MultiIndex.from_arrays(arrays)
961-
values = index.get_level_values(1)
962-
expected = np.array([1, np.nan, 2])
963-
tm.assert_numpy_array_equal(values.values.astype(float), expected)
962+
result = index.get_level_values(1)
963+
expected = Index([1, np.nan, 2])
964+
tm.assert_index_equal(result, expected)
964965

965966
arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]]
966967
index = pd.MultiIndex.from_arrays(arrays)
967-
values = index.get_level_values(1)
968-
expected = np.array([np.nan, np.nan, 2])
969-
tm.assert_numpy_array_equal(values.values.astype(float), expected)
968+
result = index.get_level_values(1)
969+
expected = Index([np.nan, np.nan, 2])
970+
tm.assert_index_equal(result, expected)
970971

972+
def test_get_level_values_na(self):
971973
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
972974
index = pd.MultiIndex.from_arrays(arrays)
973975
result = index.get_level_values(0)
@@ -982,7 +984,7 @@ def test_get_level_values_na(self):
982984
index = pd.MultiIndex.from_arrays(arrays)
983985
values = index.get_level_values(1)
984986
expected = pd.DatetimeIndex([0, 1, pd.NaT])
985-
tm.assert_numpy_array_equal(values.values, expected.values)
987+
tm.assert_index_equal(values, expected)
986988

987989
arrays = [[], []]
988990
index = pd.MultiIndex.from_arrays(arrays)
@@ -2269,6 +2271,20 @@ def test_unique(self):
22692271
exp = pd.MultiIndex.from_arrays([['a'], ['a']])
22702272
tm.assert_index_equal(res, exp)
22712273

2274+
# GH #17896 - with level= argument
2275+
result = self.index.unique(level=0)
2276+
expected = Index(['foo', 'bar', 'baz', 'qux'],
2277+
name='first')
2278+
tm.assert_index_equal(result, expected)
2279+
2280+
@pytest.mark.xfail(reason='GH 17924 (returns Int64Index with float data)')
2281+
def test_unique_with_nans(self):
2282+
# with NaNs
2283+
index = pd.MultiIndex.from_arrays([['a', 'b', 'b'], [2, np.nan, 2]])
2284+
result = index.unique(level=1)
2285+
expected = pd.Index([2, np.nan])
2286+
tm.assert_index_equal(result, expected)
2287+
22722288
def test_unique_datetimelike(self):
22732289
idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
22742290
'2015-01-01', 'NaT', 'NaT'])

0 commit comments

Comments
 (0)