From be3832d258e19425504fce62535473b445e9390a Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 23 Dec 2018 09:24:43 +0000 Subject: [PATCH 1/2] clean Categorical.__init__ --- pandas/core/arrays/categorical.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6ccb8dc5d2725..53244886170c8 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -341,9 +341,6 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, elif categories is not None or ordered is not None: raise ValueError("Cannot specify both `dtype` and `categories`" " or `ordered`.") - - categories = dtype.categories - elif is_categorical(values): # If no "dtype" was passed, use the one from "values", but honor # the "ordered" and "categories" arguments @@ -355,19 +352,17 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, if (isinstance(values, (ABCSeries, ABCIndexClass)) and isinstance(values._values, type(self))): values = values._values.codes.copy() - if categories is None: - categories = dtype.categories fastpath = True - else: # If dtype=None and values is not categorical, create a new dtype dtype = CategoricalDtype(categories, ordered) - # At this point, dtype is always a CategoricalDtype + # At this point, dtype is always a CategoricalDtype and you should not + # use categories and ordered seperately. # if dtype.categories is None, we are inferring if fastpath: - self._codes = coerce_indexer_dtype(values, categories) + self._codes = coerce_indexer_dtype(values, dtype.categories) self._dtype = self._dtype.update_dtype(dtype) return @@ -376,9 +371,8 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, null_mask = np.array(False) # sanitize input - if is_categorical_dtype(values): - if dtype.categories is None: - dtype = CategoricalDtype(values.categories, dtype.ordered) + if is_categorical_dtype(values) and dtype.categories is None: + dtype = CategoricalDtype(values.categories, dtype.ordered) elif not isinstance(values, (ABCIndexClass, ABCSeries)): # sanitize_array coerces np.nan to a string under certain versions From f8034cf5fcf79b8d1351e943f11f200d9c60d226 Mon Sep 17 00:00:00 2001 From: tp Date: Sun, 23 Dec 2018 21:22:19 +0000 Subject: [PATCH 2/2] change according to comments --- pandas/core/arrays/categorical.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 53244886170c8..ead8979fa2a91 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -371,9 +371,9 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, null_mask = np.array(False) # sanitize input - if is_categorical_dtype(values) and dtype.categories is None: - dtype = CategoricalDtype(values.categories, dtype.ordered) - + if is_categorical_dtype(values): + if dtype.categories is None: + dtype = CategoricalDtype(values.categories, dtype.ordered) elif not isinstance(values, (ABCIndexClass, ABCSeries)): # sanitize_array coerces np.nan to a string under certain versions # of numpy