@@ -207,7 +207,7 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
207
207
if fastpath :
208
208
# fast path
209
209
self ._codes = _coerce_indexer_dtype (values , categories )
210
- self .categories = categories
210
+ self ._categories = self . _validate_categories ( categories , fastpath = isinstance ( categories , ABCIndexClass ))
211
211
self ._ordered = ordered
212
212
return
213
213
@@ -274,6 +274,8 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
274
274
### FIXME ####
275
275
raise NotImplementedError ("> 1 ndim Categorical are not supported at this time" )
276
276
277
+ categories = self ._validate_categories (categories )
278
+
277
279
else :
278
280
# there were two ways if categories are present
279
281
# - the old one, where each value is a int pointer to the levels array -> not anymore
@@ -282,7 +284,6 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
282
284
283
285
# make sure that we always have the same type here, no matter what we get passed in
284
286
categories = self ._validate_categories (categories )
285
-
286
287
codes = _get_codes_for_values (values , categories )
287
288
288
289
# TODO: check for old style usage. These warnings should be removes after 0.18/ in 2016
@@ -295,7 +296,7 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
295
296
"'Categorical.from_codes(codes, categories)'?" , RuntimeWarning , stacklevel = 2 )
296
297
297
298
self .set_ordered (ordered or False , inplace = True )
298
- self .categories = categories
299
+ self ._categories = categories
299
300
self ._codes = _coerce_indexer_dtype (codes , categories )
300
301
301
302
def copy (self ):
@@ -421,9 +422,15 @@ def _get_labels(self):
421
422
_categories = None
422
423
423
424
@classmethod
424
- def _validate_categories (cls , categories ):
425
+ def _validate_categories (cls , categories , fastpath = False ):
425
426
"""
426
427
Validates that we have good categories
428
+
429
+ Parameters
430
+ ----------
431
+ fastpath : boolean (default: False)
432
+ Don't perform validation of the categories for uniqueness or nulls
433
+
427
434
"""
428
435
if not isinstance (categories , ABCIndexClass ):
429
436
dtype = None
@@ -439,22 +446,40 @@ def _validate_categories(cls, categories):
439
446
440
447
from pandas import Index
441
448
categories = Index (categories , dtype = dtype )
442
- if not categories .is_unique :
443
- raise ValueError ('Categorical categories must be unique' )
449
+
450
+ if not fastpath :
451
+
452
+ # check properties of the categories
453
+ # we don't allow NaNs in the categories themselves
454
+
455
+ if categories .hasnans :
456
+ # NaNs in cats deprecated in 0.17, remove in 0.18 or 0.19 GH 10748
457
+ msg = ('\n Setting NaNs in `categories` is deprecated and '
458
+ 'will be removed in a future version of pandas.' )
459
+ warn (msg , FutureWarning , stacklevel = 5 )
460
+
461
+ # categories must be unique
462
+
463
+ if not categories .is_unique :
464
+ raise ValueError ('Categorical categories must be unique' )
465
+
444
466
return categories
445
467
446
- def _set_categories (self , categories , validate = True ):
447
- """ Sets new categories """
448
- if validate :
449
- categories = self ._validate_categories (categories )
450
- if not self ._categories is None and len (categories ) != len (self ._categories ):
451
- raise ValueError ("new categories need to have the same number of items than the old "
452
- "categories!" )
453
- if np .any (isnull (categories )):
454
- # NaNs in cats deprecated in 0.17, remove in 0.18 or 0.19 GH 10748
455
- msg = ('\n Setting NaNs in `categories` is deprecated and '
456
- 'will be removed in a future version of pandas.' )
457
- warn (msg , FutureWarning , stacklevel = 9 )
468
+ def _set_categories (self , categories , fastpath = False ):
469
+ """ Sets new categories
470
+
471
+ Parameters
472
+ ----------
473
+ fastpath : boolean (default: False)
474
+ Don't perform validation of the categories for uniqueness or nulls
475
+
476
+ """
477
+
478
+ categories = self ._validate_categories (categories , fastpath = fastpath )
479
+ if not fastpath and not self ._categories is None and len (categories ) != len (self ._categories ):
480
+ raise ValueError ("new categories need to have the same number of items than the old "
481
+ "categories!" )
482
+
458
483
self ._categories = categories
459
484
460
485
def _get_categories (self ):
@@ -587,11 +612,10 @@ def set_categories(self, new_categories, ordered=None, rename=False, inplace=Fal
587
612
if not cat ._categories is None and len (new_categories ) < len (cat ._categories ):
588
613
# remove all _codes which are larger and set to -1/NaN
589
614
self ._codes [self ._codes >= len (new_categories )] = - 1
590
- cat ._set_categories (new_categories , validate = False )
591
615
else :
592
616
values = cat .__array__ ()
593
617
cat ._codes = _get_codes_for_values (values , new_categories )
594
- cat ._set_categories ( new_categories , validate = False )
618
+ cat ._categories = new_categories
595
619
596
620
if ordered is None :
597
621
ordered = self .ordered
@@ -712,9 +736,8 @@ def add_categories(self, new_categories, inplace=False):
712
736
msg = "new categories must not include old categories: %s" % str (already_included )
713
737
raise ValueError (msg )
714
738
new_categories = list (self ._categories ) + list (new_categories )
715
- new_categories = self ._validate_categories (new_categories )
716
739
cat = self if inplace else self .copy ()
717
- cat ._set_categories ( new_categories , validate = False )
740
+ cat ._categories = self . _validate_categories ( new_categories )
718
741
cat ._codes = _coerce_indexer_dtype (cat ._codes , new_categories )
719
742
if not inplace :
720
743
return cat
@@ -797,7 +820,7 @@ def remove_unused_categories(self, inplace=False):
797
820
from pandas .core .index import _ensure_index
798
821
new_categories = _ensure_index (new_categories )
799
822
cat ._codes = _get_codes_for_values (cat .__array__ (), new_categories )
800
- cat ._set_categories ( new_categories , validate = False )
823
+ cat ._categories = new_categories
801
824
if not inplace :
802
825
return cat
803
826
0 commit comments