@@ -207,7 +207,7 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
207
207
if fastpath :
208
208
# fast path
209
209
self ._codes = _coerce_indexer_dtype (values , categories )
210
- self .categories = categories
210
+ self ._categories = self . _validate_categories ( categories , fastpath = isinstance ( categories , ABCIndexClass ))
211
211
self ._ordered = ordered
212
212
return
213
213
@@ -274,6 +274,8 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
274
274
### FIXME ####
275
275
raise NotImplementedError ("> 1 ndim Categorical are not supported at this time" )
276
276
277
+ categories = self ._validate_categories (categories )
278
+
277
279
else :
278
280
# there were two ways if categories are present
279
281
# - the old one, where each value is a int pointer to the levels array -> not anymore
@@ -282,7 +284,6 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
282
284
283
285
# make sure that we always have the same type here, no matter what we get passed in
284
286
categories = self ._validate_categories (categories )
285
-
286
287
codes = _get_codes_for_values (values , categories )
287
288
288
289
# TODO: check for old style usage. These warnings should be removes after 0.18/ in 2016
@@ -295,7 +296,7 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
295
296
"'Categorical.from_codes(codes, categories)'?" , RuntimeWarning , stacklevel = 2 )
296
297
297
298
self .set_ordered (ordered or False , inplace = True )
298
- self .categories = categories
299
+ self ._categories = categories
299
300
self ._codes = _coerce_indexer_dtype (codes , categories )
300
301
301
302
def copy (self ):
@@ -421,9 +422,15 @@ def _get_labels(self):
421
422
_categories = None
422
423
423
424
@classmethod
424
- def _validate_categories (cls , categories ):
425
+ def _validate_categories (cls , categories , fastpath = False ):
425
426
"""
426
427
Validates that we have good categories
428
+
429
+ Parameters
430
+ ----------
431
+ fastpath : boolean (default: False)
432
+ Don't perform validation of the categories for uniqueness or nulls
433
+
427
434
"""
428
435
if not isinstance (categories , ABCIndexClass ):
429
436
dtype = None
@@ -439,16 +446,40 @@ def _validate_categories(cls, categories):
439
446
440
447
from pandas import Index
441
448
categories = Index (categories , dtype = dtype )
442
- if not categories .is_unique :
443
- raise ValueError ('Categorical categories must be unique' )
449
+
450
+ if not fastpath :
451
+
452
+ # check properties of the categories
453
+ # we don't allow NaNs in the categories themselves
454
+
455
+ if categories .hasnans :
456
+ # NaNs in cats deprecated in 0.17, remove in 0.18 or 0.19 GH 10748
457
+ msg = ('\n Setting NaNs in `categories` is deprecated and '
458
+ 'will be removed in a future version of pandas.' )
459
+ warn (msg , FutureWarning , stacklevel = 5 )
460
+
461
+ # categories must be unique
462
+
463
+ if not categories .is_unique :
464
+ raise ValueError ('Categorical categories must be unique' )
465
+
444
466
return categories
445
467
446
- def _set_categories (self , categories ):
447
- """ Sets new categories """
448
- categories = self ._validate_categories (categories )
449
- if not self ._categories is None and len (categories ) != len (self ._categories ):
468
+ def _set_categories (self , categories , fastpath = False ):
469
+ """ Sets new categories
470
+
471
+ Parameters
472
+ ----------
473
+ fastpath : boolean (default: False)
474
+ Don't perform validation of the categories for uniqueness or nulls
475
+
476
+ """
477
+
478
+ categories = self ._validate_categories (categories , fastpath = fastpath )
479
+ if not fastpath and not self ._categories is None and len (categories ) != len (self ._categories ):
450
480
raise ValueError ("new categories need to have the same number of items than the old "
451
481
"categories!" )
482
+
452
483
self ._categories = categories
453
484
454
485
def _get_categories (self ):
@@ -581,11 +612,10 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
581
612
if not cat ._categories is None and len (new_categories ) < len (cat ._categories ):
582
613
# remove all _codes which are larger and set to -1/NaN
583
614
self ._codes [self ._codes >= len (new_categories )] = - 1
584
- cat ._categories = new_categories
585
615
else :
586
616
values = cat .__array__ ()
587
617
cat ._codes = _get_codes_for_values (values , new_categories )
588
- cat ._categories = new_categories
618
+ cat ._categories = new_categories
589
619
590
620
if ordered is None :
591
621
ordered = self .ordered
@@ -706,9 +736,8 @@ def add_categories(self, new_categories, inplace=False):
706
736
msg = "new categories must not include old categories: %s" % str (already_included )
707
737
raise ValueError (msg )
708
738
new_categories = list (self ._categories ) + list (new_categories )
709
- new_categories = self ._validate_categories (new_categories )
710
739
cat = self if inplace else self .copy ()
711
- cat ._categories = new_categories
740
+ cat ._categories = self . _validate_categories ( new_categories )
712
741
cat ._codes = _coerce_indexer_dtype (cat ._codes , new_categories )
713
742
if not inplace :
714
743
return cat
@@ -1171,7 +1200,7 @@ def order(self, inplace=False, ascending=True, na_position='last'):
1171
1200
Category.sort
1172
1201
"""
1173
1202
warn ("order is deprecated, use sort_values(...)" ,
1174
- FutureWarning , stacklevel = 2 )
1203
+ FutureWarning , stacklevel = 3 )
1175
1204
return self .sort_values (inplace = inplace , ascending = ascending , na_position = na_position )
1176
1205
1177
1206
def sort (self , inplace = True , ascending = True , na_position = 'last' ):
0 commit comments