@@ -288,6 +288,10 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
288
288
self ._dtype = dtype
289
289
return
290
290
291
+ # null_mask indicates missing values we want to exclude from inference.
292
+ # This means: only missing values in list-likes (not arrays/ndframes).
293
+ null_mask = np .array (False )
294
+
291
295
# sanitize input
292
296
if is_categorical_dtype (values ):
293
297
@@ -316,13 +320,14 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
316
320
if not isinstance (values , np .ndarray ):
317
321
values = _convert_to_list_like (values )
318
322
from pandas .core .series import _sanitize_array
319
- # On list with NaNs, int values will be converted to float. Use
320
- # "object" dtype to prevent this. In the end objects will be
321
- # casted to int/... in the category assignment step.
322
- if len (values ) == 0 or isna (values ).any ():
323
+ # By convention, empty lists result in object dtype:
324
+ if len (values ) == 0 :
323
325
sanitize_dtype = 'object'
324
326
else :
325
327
sanitize_dtype = None
328
+ null_mask = isna (values )
329
+ if null_mask .any ():
330
+ values = [values [idx ] for idx in np .where (~ null_mask )[0 ]]
326
331
values = _sanitize_array (values , None , dtype = sanitize_dtype )
327
332
328
333
if dtype .categories is None :
@@ -370,6 +375,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
370
375
"mean to use\n 'Categorical.from_codes(codes, "
371
376
"categories)'?" , RuntimeWarning , stacklevel = 2 )
372
377
378
+ if null_mask .any ():
379
+ # Reinsert -1 placeholders for previously removed missing values
380
+ full_codes = - np .ones (null_mask .shape , dtype = codes .dtype )
381
+ full_codes [~ null_mask ] = codes
382
+ codes = full_codes
383
+
373
384
self ._dtype = dtype
374
385
self ._codes = coerce_indexer_dtype (codes , dtype .categories )
375
386
0 commit comments