Skip to content

Commit e6a0ef8

Browse files
toobazjreback
authored andcommitted
REF: smarter NaN handling in remove_unused_levels() (#18438)
1 parent 4e09480 commit e6a0ef8

File tree

1 file changed

+12
-14
lines changed

1 file changed

+12
-14
lines changed

pandas/core/indexes/multi.py

+12-14
Original file line numberDiff line numberDiff line change
@@ -1365,31 +1365,29 @@ def remove_unused_levels(self):
13651365
new_labels = []
13661366

13671367
changed = False
1368-
for idx, (lev, lab) in enumerate(zip(self.levels, self.labels)):
1369-
na_idxs = np.where(lab == -1)[0]
1370-
1371-
if len(na_idxs):
1372-
lab = np.delete(lab, na_idxs)
1368+
for lev, lab in zip(self.levels, self.labels):
13731369

13741370
uniques = algos.unique(lab)
1371+
na_idx = np.where(uniques == -1)[0]
13751372

13761373
# nothing unused
1377-
if len(uniques) != len(lev):
1374+
if len(uniques) != len(lev) + len(na_idx):
13781375
changed = True
13791376

1377+
if len(na_idx):
1378+
# Just ensure that -1 is in first position:
1379+
uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]
1380+
13801381
# labels get mapped from uniques to 0:len(uniques)
1381-
label_mapping = np.zeros(len(lev))
1382-
label_mapping[uniques] = np.arange(len(uniques))
1382+
# -1 (if present) is mapped to last position
1383+
label_mapping = np.zeros(len(lev) + len(na_idx))
1384+
# ... and reassigned value -1:
1385+
label_mapping[uniques] = np.arange(len(uniques)) - len(na_idx)
13831386

13841387
lab = label_mapping[lab]
13851388

13861389
# new levels are simple
1387-
lev = lev.take(uniques)
1388-
1389-
if len(na_idxs):
1390-
lab = np.insert(lab, na_idxs - np.arange(len(na_idxs)), -1)
1391-
else:
1392-
lab = self.labels[idx]
1390+
lev = lev.take(uniques[len(na_idx):])
13931391

13941392
new_levels.append(lev)
13951393
new_labels.append(lab)

0 commit comments

Comments
 (0)