Skip to content

Commit fc3f110

Browse files
committed
BUG: fix MultiIndex.remove_unused_levels() when index contains NaNs
closes pandas-dev#18417
1 parent 103ea6f commit fc3f110

File tree

3 files changed

+22
-1
lines changed

3 files changed

+22
-1
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ Indexing
119119

120120
- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
121121
- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
122-
-
122+
- Bug in :func:`MultiIndex.remove_unused_levels`` which would fill nan values (:issue:`18417`)
123123
-
124124

125125
I/O

pandas/core/indexes/multi.py

+11
Original file line numberDiff line numberDiff line change
@@ -1366,6 +1366,11 @@ def remove_unused_levels(self):
13661366

13671367
changed = False
13681368
for lev, lab in zip(self.levels, self.labels):
1369+
null_mask = lab == -1
1370+
if null_mask.any():
1371+
lab = lab[~null_mask]
1372+
else:
1373+
null_mask = None
13691374

13701375
uniques = algos.unique(lab)
13711376

@@ -1380,8 +1385,14 @@ def remove_unused_levels(self):
13801385
# labels get mapped from uniques to 0:len(uniques)
13811386
label_mapping = np.zeros(len(lev))
13821387
label_mapping[uniques] = np.arange(len(uniques))
1388+
13831389
lab = label_mapping[lab]
13841390

1391+
if null_mask is not None:
1392+
full_lab = - np.ones(null_mask.shape, dtype=lab.dtype)
1393+
full_lab[~null_mask] = lab
1394+
lab = full_lab
1395+
13851396
# new levels are simple
13861397
lev = lev.take(uniques)
13871398

pandas/tests/indexes/test_multi.py

+10
Original file line numberDiff line numberDiff line change
@@ -2629,6 +2629,16 @@ def test_reconstruct_remove_unused(self):
26292629
tm.assert_index_equal(result2, expected)
26302630
assert result2.is_(result)
26312631

2632+
@pytest.mark.parametrize('level', [['a', 'd', 'b'],
2633+
['a', 'd', 'b', 'unused']])
2634+
def test_remove_unused_nan(self, level):
2635+
# GH 18417
2636+
df = pd.MultiIndex(levels=[level, ['w', 'x', 'y', 'z']],
2637+
labels=[[0, 2, -1, 1, 1], [0, 1, 2, 3, 2]])
2638+
2639+
result = df.remove_unused_levels()
2640+
tm.assert_index_equal(result, df)
2641+
26322642
@pytest.mark.parametrize('first_type,second_type', [
26332643
('int64', 'int64'),
26342644
('datetime64[D]', 'str')])

0 commit comments

Comments
 (0)