25
25
is_timedelta64_dtype ,
26
26
is_categorical ,
27
27
is_categorical_dtype ,
28
- is_integer_dtype ,
29
28
is_list_like , is_sequence ,
30
29
is_scalar ,
31
30
is_dict_like )
@@ -261,6 +260,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
261
260
# c.) infer from values
262
261
263
262
if dtype is not None :
263
+ # The dtype argument takes precedence over values.dtype (if any)
264
264
if isinstance (dtype , compat .string_types ):
265
265
if dtype == 'category' :
266
266
dtype = CategoricalDtype (categories , ordered )
@@ -275,9 +275,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
275
275
ordered = dtype .ordered
276
276
277
277
elif is_categorical (values ):
278
+ # If no "dtype" was passed, use the one from "values", but honor
279
+ # the "ordered" and "categories" arguments
278
280
dtype = values .dtype ._from_categorical_dtype (values .dtype ,
279
281
categories , ordered )
280
282
else :
283
+ # If dtype=None and values is not categorical, create a new dtype
281
284
dtype = CategoricalDtype (categories , ordered )
282
285
283
286
# At this point, dtype is always a CategoricalDtype
@@ -294,28 +297,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
294
297
295
298
# sanitize input
296
299
if is_categorical_dtype (values ):
300
+ if dtype .categories is None :
301
+ dtype = CategoricalDtype (values .categories , dtype .ordered )
297
302
298
- # we are either a Series or a CategoricalIndex
299
- if isinstance (values , (ABCSeries , ABCCategoricalIndex )):
300
- values = values ._values
301
-
302
- if ordered is None :
303
- ordered = values .ordered
304
- if categories is None :
305
- categories = values .categories
306
- values = values .get_values ()
307
-
308
- elif isinstance (values , (ABCIndexClass , ABCSeries )):
309
- # we'll do inference later
310
- pass
311
-
312
- else :
313
-
314
- # on numpy < 1.6 datetimelike get inferred to all i8 by
315
- # _sanitize_array which is fine, but since factorize does this
316
- # correctly no need here this is an issue because _sanitize_array
317
- # also coerces np.nan to a string under certain versions of numpy
318
- # as well
303
+ elif not isinstance (values , (ABCIndexClass , ABCSeries )):
304
+ # _sanitize_array coerces np.nan to a string under certain versions
305
+ # of numpy
319
306
values = maybe_infer_to_datetimelike (values , convert_dates = True )
320
307
if not isinstance (values , np .ndarray ):
321
308
values = _convert_to_list_like (values )
@@ -335,7 +322,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
335
322
codes , categories = factorize (values , sort = True )
336
323
except TypeError :
337
324
codes , categories = factorize (values , sort = False )
338
- if ordered :
325
+ if dtype . ordered :
339
326
# raise, as we don't have a sortable data structure and so
340
327
# the user should give us one by specifying categories
341
328
raise TypeError ("'values' is not ordered, please "
@@ -347,34 +334,18 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
347
334
raise NotImplementedError ("> 1 ndim Categorical are not "
348
335
"supported at this time" )
349
336
350
- if dtype .categories is None :
351
- # we're inferring from values
352
- dtype = CategoricalDtype (categories , ordered )
337
+ # we're inferring from values
338
+ dtype = CategoricalDtype (categories , dtype .ordered )
353
339
354
- else :
355
- # there were two ways if categories are present
356
- # - the old one, where each value is a int pointer to the levels
357
- # array -> not anymore possible, but code outside of pandas could
358
- # call us like that, so make some checks
359
- # - the new one, where each value is also in the categories array
360
- # (or np.nan)
340
+ elif is_categorical_dtype (values ):
341
+ old_codes = (values .cat .codes if isinstance (values , ABCSeries )
342
+ else values .codes )
343
+ codes = _recode_for_categories (old_codes , values .dtype .categories ,
344
+ dtype .categories )
361
345
346
+ else :
362
347
codes = _get_codes_for_values (values , dtype .categories )
363
348
364
- # TODO: check for old style usage. These warnings should be removes
365
- # after 0.18/ in 2016
366
- if (is_integer_dtype (values ) and
367
- not is_integer_dtype (dtype .categories )):
368
- warn ("Values and categories have different dtypes. Did you "
369
- "mean to use\n 'Categorical.from_codes(codes, "
370
- "categories)'?" , RuntimeWarning , stacklevel = 2 )
371
-
372
- if (len (values ) and is_integer_dtype (values ) and
373
- (codes == - 1 ).all ()):
374
- warn ("None of the categories were found in values. Did you "
375
- "mean to use\n 'Categorical.from_codes(codes, "
376
- "categories)'?" , RuntimeWarning , stacklevel = 2 )
377
-
378
349
if null_mask .any ():
379
350
# Reinsert -1 placeholders for previously removed missing values
380
351
full_codes = - np .ones (null_mask .shape , dtype = codes .dtype )
0 commit comments