@@ -67,7 +67,6 @@ def _maybe_to_categorical(array):
67
67
return array .values
68
68
return array
69
69
70
-
71
70
_codes_doc = """The category codes of this categorical.
72
71
73
72
Level codes are an array if integer which are the positions of the real
@@ -194,7 +193,7 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
194
193
195
194
if fastpath :
196
195
# fast path
197
- self ._codes = values
196
+ self ._codes = _coerce_codes_dtype ( values , categories )
198
197
self .name = name
199
198
self .categories = categories
200
199
self .ordered = ordered
@@ -285,9 +284,9 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
285
284
ordered = True
286
285
287
286
self .ordered = False if ordered is None else ordered
288
- self ._codes = codes
289
287
self .categories = categories
290
288
self .name = name
289
+ self ._codes = _coerce_codes_dtype (codes , categories )
291
290
292
291
def copy (self ):
293
292
""" Copy constructor. """
@@ -607,6 +606,7 @@ def add_categories(self, new_categories, inplace=False):
607
606
new_categories = self ._validate_categories (new_categories )
608
607
cat = self if inplace else self .copy ()
609
608
cat ._categories = new_categories
609
+ cat ._codes = _coerce_codes_dtype (cat ._codes , new_categories )
610
610
if not inplace :
611
611
return cat
612
612
@@ -1105,6 +1105,12 @@ def __unicode__(self):
1105
1105
1106
1106
return result
1107
1107
1108
+ def _maybe_coerce_indexer (self , indexer ):
1109
+ """ return an indexer coerced to the codes dtype """
1110
+ if isinstance (indexer , np .ndarray ) and indexer .dtype .kind == 'i' :
1111
+ indexer = indexer .astype (self ._codes .dtype )
1112
+ return indexer
1113
+
1108
1114
def __getitem__ (self , key ):
1109
1115
""" Return an item. """
1110
1116
if isinstance (key , (int , np .integer )):
@@ -1114,6 +1120,7 @@ def __getitem__(self, key):
1114
1120
else :
1115
1121
return self .categories [i ]
1116
1122
else :
1123
+ key = self ._maybe_coerce_indexer (key )
1117
1124
return Categorical (values = self ._codes [key ], categories = self .categories ,
1118
1125
ordered = self .ordered , fastpath = True )
1119
1126
@@ -1181,6 +1188,8 @@ def __setitem__(self, key, value):
1181
1188
nan_pos = np .where (com .isnull (self .categories ))[0 ]
1182
1189
lindexer [lindexer == - 1 ] = nan_pos
1183
1190
1191
+ key = self ._maybe_coerce_indexer (key )
1192
+ lindexer = self ._maybe_coerce_indexer (lindexer )
1184
1193
self ._codes [key ] = lindexer
1185
1194
1186
1195
#### reduction ops ####
@@ -1395,6 +1404,22 @@ def _delegate_method(self, name, *args, **kwargs):
1395
1404
1396
1405
##### utility routines #####
1397
1406
1407
+ _int8_max = np .iinfo (np .int8 ).max
1408
+ _int16_max = np .iinfo (np .int16 ).max
1409
+ _int32_max = np .iinfo (np .int32 ).max
1410
+
1411
+ def _coerce_codes_dtype (codes , categories ):
1412
+ """ coerce the code input array to an appropriate dtype """
1413
+ codes = np .array (codes ,copy = False )
1414
+ l = len (categories )
1415
+ if l < _int8_max :
1416
+ return codes .astype ('int8' )
1417
+ elif l < _int16_max :
1418
+ return codes .astype ('int16' )
1419
+ elif l < _int32_max :
1420
+ return codes .astype ('int32' )
1421
+ return codes .astype ('int64' )
1422
+
1398
1423
def _get_codes_for_values (values , categories ):
1399
1424
""""
1400
1425
utility routine to turn values into codes given the specified categories
@@ -1407,7 +1432,7 @@ def _get_codes_for_values(values, categories):
1407
1432
(hash_klass , vec_klass ), vals = _get_data_algo (values , _hashtables )
1408
1433
t = hash_klass (len (categories ))
1409
1434
t .map_locations (com ._values_from_object (categories ))
1410
- return com . _ensure_platform_int (t .lookup (values ))
1435
+ return _coerce_codes_dtype (t .lookup (values ), categories )
1411
1436
1412
1437
def _convert_to_list_like (list_like ):
1413
1438
if hasattr (list_like , "dtype" ):
0 commit comments