@@ -234,6 +234,21 @@ class Categorical(PandasObject):
234
234
def __init__ (self , values , categories = None , ordered = None , dtype = None ,
235
235
fastpath = False ):
236
236
237
+ # Ways of specifying the dtype (prioritized ordered)
238
+ # 1. dtype is a CategoricalDtype
239
+ # a.) with known categories, use dtype.categories
240
+ # b.) else with Categorical values, use values.dtype
241
+ # c.) else, infer from values
242
+ # d.) specifying dtype=CategoricalDtype and categories is an error
243
+ # 2. dtype is a string 'category'
244
+ # a.) use categories, ordered
245
+ # b.) use values.dtype
246
+ # c.) infer from values
247
+ # 3. dtype is None
248
+ # a.) use categories, ordered
249
+ # b.) use values.dtype
250
+ # c.) infer from values
251
+
237
252
if dtype is not None :
238
253
if isinstance (dtype , compat .string_types ):
239
254
if dtype == 'category' :
@@ -247,20 +262,24 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
247
262
categories = dtype .categories
248
263
ordered = dtype .ordered
249
264
250
- if ordered is None :
251
- ordered = False
265
+ elif is_categorical (values ):
266
+ dtype = values .dtype ._from_categorical_dtype (values .dtype ,
267
+ categories , ordered )
268
+ else :
269
+ dtype = CategoricalDtype (categories , ordered )
270
+
271
+ # At this point, dtype is always a CategoricalDtype
272
+ # if dtype.categories is None, we are inferring
252
273
253
274
if fastpath :
254
- if dtype is None :
255
- dtype = CategoricalDtype (categories , ordered )
256
275
self ._codes = coerce_indexer_dtype (values , categories )
257
276
self ._dtype = dtype
258
277
return
259
278
260
279
# sanitize input
261
280
if is_categorical_dtype (values ):
262
281
263
- # we are either a Series, CategoricalIndex
282
+ # we are either a Series or a CategoricalIndex
264
283
if isinstance (values , (ABCSeries , ABCCategoricalIndex )):
265
284
values = values ._values
266
285
@@ -271,6 +290,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
271
290
values = values .get_values ()
272
291
273
292
elif isinstance (values , (ABCIndexClass , ABCSeries )):
293
+ # we'll do inference later
274
294
pass
275
295
276
296
else :
@@ -288,12 +308,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
288
308
# "object" dtype to prevent this. In the end objects will be
289
309
# casted to int/... in the category assignment step.
290
310
if len (values ) == 0 or isna (values ).any ():
291
- dtype = 'object'
311
+ sanitize_dtype = 'object'
292
312
else :
293
- dtype = None
294
- values = _sanitize_array (values , None , dtype = dtype )
313
+ sanitize_dtype = None
314
+ values = _sanitize_array (values , None , dtype = sanitize_dtype )
295
315
296
- if categories is None :
316
+ if dtype . categories is None :
297
317
try :
298
318
codes , categories = factorize (values , sort = True )
299
319
except TypeError :
@@ -310,7 +330,8 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
310
330
raise NotImplementedError ("> 1 ndim Categorical are not "
311
331
"supported at this time" )
312
332
313
- if dtype is None or isinstance (dtype , str ):
333
+ if dtype .categories is None :
334
+ # we're inferring from values
314
335
dtype = CategoricalDtype (categories , ordered )
315
336
316
337
else :
@@ -321,11 +342,6 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
321
342
# - the new one, where each value is also in the categories array
322
343
# (or np.nan)
323
344
324
- # make sure that we always have the same type here, no matter what
325
- # we get passed in
326
- if dtype is None or isinstance (dtype , str ):
327
- dtype = CategoricalDtype (categories , ordered )
328
-
329
345
codes = _get_codes_for_values (values , dtype .categories )
330
346
331
347
# TODO: check for old style usage. These warnings should be removes
0 commit comments