Skip to content

Commit 1ce45d0

Browse files
committed
BUG: fix MultiIndex.remove_unused_levels() when index contains NaNs
closes pandas-dev#18417
1 parent 103ea6f commit 1ce45d0

File tree

3 files changed

+33
-13
lines changed

3 files changed

+33
-13
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ Indexing
119119

120120
- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
121121
- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
122-
-
122+
- Bug in :func:`MultiIndex.remove_unused_levels`` which would fill nan values (:issue:`18417`)
123123
-
124124

125125
I/O

pandas/core/indexes/multi.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -1365,25 +1365,31 @@ def remove_unused_levels(self):
13651365
new_labels = []
13661366

13671367
changed = False
1368-
for lev, lab in zip(self.levels, self.labels):
1368+
for idx, (lev, lab) in enumerate(zip(self.levels, self.labels)):
1369+
na_idxs = np.where(lab == -1)[0]
1370+
1371+
if len(na_idxs):
1372+
lab = np.delete(lab, na_idxs)
13691373

13701374
uniques = algos.unique(lab)
13711375

13721376
# nothing unused
1373-
if len(uniques) == len(lev):
1374-
new_levels.append(lev)
1375-
new_labels.append(lab)
1376-
continue
1377+
if len(uniques) != len(lev):
1378+
changed = True
1379+
1380+
# labels get mapped from uniques to 0:len(uniques)
1381+
label_mapping = np.zeros(len(lev))
1382+
label_mapping[uniques] = np.arange(len(uniques))
13771383

1378-
changed = True
1384+
lab = label_mapping[lab]
13791385

1380-
# labels get mapped from uniques to 0:len(uniques)
1381-
label_mapping = np.zeros(len(lev))
1382-
label_mapping[uniques] = np.arange(len(uniques))
1383-
lab = label_mapping[lab]
1386+
# new levels are simple
1387+
lev = lev.take(uniques)
13841388

1385-
# new levels are simple
1386-
lev = lev.take(uniques)
1389+
if len(na_idxs):
1390+
lab = np.insert(lab, na_idxs - np.arange(len(na_idxs)), -1)
1391+
else:
1392+
lab = self.labels[idx]
13871393

13881394
new_levels.append(lev)
13891395
new_labels.append(lab)

pandas/tests/indexes/test_multi.py

+14
Original file line numberDiff line numberDiff line change
@@ -2629,6 +2629,20 @@ def test_reconstruct_remove_unused(self):
26292629
tm.assert_index_equal(result2, expected)
26302630
assert result2.is_(result)
26312631

2632+
@pytest.mark.parametrize('level0', [['a', 'd', 'b'],
2633+
['a', 'd', 'b', 'unused']])
2634+
@pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'],
2635+
['w', 'x', 'y', 'z', 'unused']])
2636+
def test_remove_unused_nan(self, level0, level1):
2637+
# GH 18417
2638+
mi = pd.MultiIndex(levels=[level0, level1],
2639+
labels=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
2640+
2641+
result = mi.remove_unused_levels()
2642+
tm.assert_index_equal(result, mi)
2643+
for level in 0, 1:
2644+
assert('unused' not in result.levels[level])
2645+
26322646
@pytest.mark.parametrize('first_type,second_type', [
26332647
('int64', 'int64'),
26342648
('datetime64[D]', 'str')])

0 commit comments

Comments
 (0)