@@ -649,7 +649,12 @@ def _from_inferred_categories(
649
649
650
650
@classmethod
651
651
def from_codes (
652
- cls , codes , categories = None , ordered = None , dtype : Dtype | None = None
652
+ cls ,
653
+ codes ,
654
+ categories = None ,
655
+ ordered = None ,
656
+ dtype : Dtype | None = None ,
657
+ validate : bool = True ,
653
658
) -> Self :
654
659
"""
655
660
Make a Categorical type from codes and categories or dtype.
@@ -677,6 +682,12 @@ def from_codes(
677
682
dtype : CategoricalDtype or "category", optional
678
683
If :class:`CategoricalDtype`, cannot be used together with
679
684
`categories` or `ordered`.
685
+ validate : bool, default True
686
+ If True, validate that the codes are valid for the dtype.
687
+ If False, don't validate that the codes are valid. Be careful about skipping
688
+ validation, as invalid codes can lead to severe problems, such as segfaults.
689
+
690
+ .. versionadded:: 2.1.0
680
691
681
692
Returns
682
693
-------
@@ -699,18 +710,9 @@ def from_codes(
699
710
)
700
711
raise ValueError (msg )
701
712
702
- if isinstance (codes , ExtensionArray ) and is_integer_dtype (codes .dtype ):
703
- # Avoid the implicit conversion of Int to object
704
- if isna (codes ).any ():
705
- raise ValueError ("codes cannot contain NA values" )
706
- codes = codes .to_numpy (dtype = np .int64 )
707
- else :
708
- codes = np .asarray (codes )
709
- if len (codes ) and codes .dtype .kind not in "iu" :
710
- raise ValueError ("codes need to be array-like integers" )
711
-
712
- if len (codes ) and (codes .max () >= len (dtype .categories ) or codes .min () < - 1 ):
713
- raise ValueError ("codes need to be between -1 and len(categories)-1" )
713
+ if validate :
714
+ # beware: non-valid codes may segfault
715
+ codes = cls ._validate_codes_for_dtype (codes , dtype = dtype )
714
716
715
717
return cls ._simple_new (codes , dtype = dtype )
716
718
@@ -1325,7 +1327,7 @@ def map(
1325
1327
1326
1328
if new_categories .is_unique and not new_categories .hasnans and na_val is np .nan :
1327
1329
new_dtype = CategoricalDtype (new_categories , ordered = self .ordered )
1328
- return self .from_codes (self ._codes .copy (), dtype = new_dtype )
1330
+ return self .from_codes (self ._codes .copy (), dtype = new_dtype , validate = False )
1329
1331
1330
1332
if has_nans :
1331
1333
new_categories = new_categories .insert (len (new_categories ), na_val )
@@ -1378,6 +1380,22 @@ def _validate_scalar(self, fill_value):
1378
1380
) from None
1379
1381
return fill_value
1380
1382
1383
+ @classmethod
1384
+ def _validate_codes_for_dtype (cls , codes , * , dtype : CategoricalDtype ) -> np .ndarray :
1385
+ if isinstance (codes , ExtensionArray ) and is_integer_dtype (codes .dtype ):
1386
+ # Avoid the implicit conversion of Int to object
1387
+ if isna (codes ).any ():
1388
+ raise ValueError ("codes cannot contain NA values" )
1389
+ codes = codes .to_numpy (dtype = np .int64 )
1390
+ else :
1391
+ codes = np .asarray (codes )
1392
+ if len (codes ) and codes .dtype .kind not in "iu" :
1393
+ raise ValueError ("codes need to be array-like integers" )
1394
+
1395
+ if len (codes ) and (codes .max () >= len (dtype .categories ) or codes .min () < - 1 ):
1396
+ raise ValueError ("codes need to be between -1 and len(categories)-1" )
1397
+ return codes
1398
+
1381
1399
# -------------------------------------------------------------
1382
1400
1383
1401
@ravel_compat
@@ -2724,7 +2742,7 @@ def factorize_from_iterable(values) -> tuple[np.ndarray, Index]:
2724
2742
# The Categorical we want to build has the same categories
2725
2743
# as values but its codes are by def [0, ..., len(n_categories) - 1]
2726
2744
cat_codes = np .arange (len (values .categories ), dtype = values .codes .dtype )
2727
- cat = Categorical .from_codes (cat_codes , dtype = values .dtype )
2745
+ cat = Categorical .from_codes (cat_codes , dtype = values .dtype , validate = False )
2728
2746
2729
2747
categories = CategoricalIndex (cat )
2730
2748
codes = values .codes
0 commit comments