@@ -243,11 +243,35 @@ def __new__(cls, levels=None, codes=None, sortorder=None, names=None,
243
243
result .sortorder = sortorder
244
244
245
245
if verify_integrity :
246
- result ._verify_integrity ()
246
+ new_codes = result ._verify_integrity ()
247
+ result ._codes = new_codes
248
+
247
249
if _set_identity :
248
250
result ._reset_identity ()
251
+
249
252
return result
250
253
254
+ def _validate_codes (self , level : list , code : list ):
255
+ """
256
+ Reassign code values as -1 if their corresponding levels are NaN.
257
+
258
+ Parameters
259
+ ----------
260
+ code : list
261
+ Code to reassign.
262
+ level : list
263
+ Level to check for missing values (NaN, NaT, None).
264
+
265
+ Returns
266
+ -------
267
+ code : new code where code value = -1 if it corresponds
268
+ to a level with missing values (NaN, NaT, None).
269
+ """
270
+ null_mask = isna (level )
271
+ if np .any (null_mask ):
272
+ code = np .where (null_mask [code ], - 1 , code )
273
+ return code
274
+
251
275
def _verify_integrity (self , codes = None , levels = None ):
252
276
"""
253
277
@@ -263,6 +287,11 @@ def _verify_integrity(self, codes=None, levels=None):
263
287
ValueError
264
288
If length of levels and codes don't match, if the codes for any
265
289
level would exceed level bounds, or there are any duplicate levels.
290
+
291
+ Returns
292
+ -------
293
+ codes : new codes where code value = -1 if it corresponds to a
294
+ NaN level.
266
295
"""
267
296
# NOTE: Currently does not check, among other things, that cached
268
297
# nlevels matches nor that sortorder matches actually sortorder.
@@ -272,22 +301,33 @@ def _verify_integrity(self, codes=None, levels=None):
272
301
if len (levels ) != len (codes ):
273
302
raise ValueError ("Length of levels and codes must match. NOTE:"
274
303
" this index is in an inconsistent state." )
275
- codes_length = len (self . codes [0 ])
304
+ codes_length = len (codes [0 ])
276
305
for i , (level , level_codes ) in enumerate (zip (levels , codes )):
277
306
if len (level_codes ) != codes_length :
278
307
raise ValueError ("Unequal code lengths: %s" %
279
308
([len (code_ ) for code_ in codes ]))
280
309
if len (level_codes ) and level_codes .max () >= len (level ):
281
- raise ValueError ("On level %d, code max (%d) >= length of"
282
- " level (%d). NOTE: this index is in an"
283
- " inconsistent state" % (i , level_codes .max (),
284
- len (level )))
310
+ msg = ("On level {level}, code max ({max_code}) >= length of "
311
+ "level ({level_len}). NOTE: this index is in an "
312
+ "inconsistent state" .format (
313
+ level = i , max_code = level_codes .max (),
314
+ level_len = len (level )))
315
+ raise ValueError (msg )
316
+ if len (level_codes ) and level_codes .min () < - 1 :
317
+ raise ValueError ("On level {level}, code value ({code})"
318
+ " < -1" .format (
319
+ level = i , code = level_codes .min ()))
285
320
if not level .is_unique :
286
321
raise ValueError ("Level values must be unique: {values} on "
287
322
"level {level}" .format (
288
323
values = [value for value in level ],
289
324
level = i ))
290
325
326
+ codes = [self ._validate_codes (level , code )
327
+ for level , code in zip (levels , codes )]
328
+ new_codes = FrozenList (codes )
329
+ return new_codes
330
+
291
331
@classmethod
292
332
def from_arrays (cls , arrays , sortorder = None , names = None ):
293
333
"""
@@ -586,7 +626,8 @@ def _set_levels(self, levels, level=None, copy=False, validate=True,
586
626
new_levels = FrozenList (new_levels )
587
627
588
628
if verify_integrity :
589
- self ._verify_integrity (levels = new_levels )
629
+ new_codes = self ._verify_integrity (levels = new_levels )
630
+ self ._codes = new_codes
590
631
591
632
names = self .names
592
633
self ._levels = new_levels
@@ -676,7 +717,6 @@ def labels(self):
676
717
677
718
def _set_codes (self , codes , level = None , copy = False , validate = True ,
678
719
verify_integrity = False ):
679
-
680
720
if validate and level is None and len (codes ) != self .nlevels :
681
721
raise ValueError ("Length of codes must match number of levels" )
682
722
if validate and level is not None and len (codes ) != len (level ):
@@ -696,9 +736,10 @@ def _set_codes(self, codes, level=None, copy=False, validate=True,
696
736
new_codes = FrozenList (new_codes )
697
737
698
738
if verify_integrity :
699
- self ._verify_integrity (codes = new_codes )
739
+ new_codes = self ._verify_integrity (codes = new_codes )
700
740
701
741
self ._codes = new_codes
742
+
702
743
self ._tuples = None
703
744
self ._reset_cache ()
704
745
@@ -1763,9 +1804,10 @@ def __setstate__(self, state):
1763
1804
1764
1805
self ._set_levels ([Index (x ) for x in levels ], validate = False )
1765
1806
self ._set_codes (codes )
1807
+ new_codes = self ._verify_integrity ()
1808
+ self ._set_codes (new_codes )
1766
1809
self ._set_names (names )
1767
1810
self .sortorder = sortorder
1768
- self ._verify_integrity ()
1769
1811
self ._reset_identity ()
1770
1812
1771
1813
def __getitem__ (self , key ):
0 commit comments