From 9089b69974dd022fe27ff5bf45ef502e64ae3f59 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Sat, 31 Mar 2018 21:13:51 +0200 Subject: [PATCH] BUG: .unique() on MultiIndex: preserve names closes #20308 closes #20570 --- doc/source/whatsnew/v0.23.0.txt | 2 ++ pandas/core/indexes/multi.py | 5 ++--- pandas/tests/indexes/test_multi.py | 29 ++++++++++++++++++++++------- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ce63cb2473bc4..1f477c4f18811 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1069,6 +1069,8 @@ MultiIndex - Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`) - Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`) - Bug in :func:`MultiIndex.get_level_values` which would return an invalid index on level of ints with missing values (:issue:`17924`) +- Bug in :func:`MultiIndex.unique` when called on empty :class:`MultiIndex` (:issue:`20568`) +- Bug in :func:`MultiIndex.unique` which would not preserve level names (:issue:`20570`) - Bug in :func:`MultiIndex.remove_unused_levels` which would fill nan values (:issue:`18417`) - Bug in :func:`MultiIndex.from_tuples` which would fail to take zipped tuples in python3 (:issue:`18434`) - Bug in :func:`MultiIndex.get_loc` which would fail to automatically cast values between float and int (:issue:`18818`, :issue:`15994`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8226c4bcac494..d4b9545999bc7 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -553,11 +553,10 @@ def __contains__(self, key): @Appender(_index_shared_docs['_shallow_copy']) def _shallow_copy(self, values=None, **kwargs): if values is not None: - if 'name' in kwargs: - kwargs['names'] = kwargs.pop('name', None) + names = kwargs.pop('names', kwargs.pop('name', self.names)) # discards freq kwargs.pop('freq', None) - return MultiIndex.from_tuples(values, **kwargs) + return MultiIndex.from_tuples(values, names=names, **kwargs) return self.view() @cache_readonly diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 34abf7052da8c..984f37042d600 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2452,23 +2452,33 @@ def test_get_unique_index(self): assert result.unique tm.assert_index_equal(result, expected) - def test_unique(self): - mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]]) + @pytest.mark.parametrize('names', [None, ['first', 'second']]) + def test_unique(self, names): + mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], + names=names) res = mi.unique() - exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]]) + exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) tm.assert_index_equal(res, exp) - mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')]) + mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')], + names=names) res = mi.unique() - exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')]) + exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')], + names=mi.names) tm.assert_index_equal(res, exp) - mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')]) + mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')], + names=names) res = mi.unique() - exp = pd.MultiIndex.from_arrays([['a'], ['a']]) + exp = pd.MultiIndex.from_arrays([['a'], ['a']], names=mi.names) tm.assert_index_equal(res, exp) + # GH #20568 - empty MI + mi = pd.MultiIndex.from_arrays([[], []], names=names) + res = mi.unique() + tm.assert_index_equal(mi, res) + @pytest.mark.parametrize('level', [0, 'first', 1, 'second']) def test_unique_level(self, level): # GH #17896 - with level= argument @@ -2483,6 +2493,11 @@ def test_unique_level(self, level): expected = mi.get_level_values(level) tm.assert_index_equal(result, expected) + # With empty MI + mi = pd.MultiIndex.from_arrays([[], []], names=['first', 'second']) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + def test_unique_datetimelike(self): idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01', '2015-01-01', 'NaT', 'NaT'])