pandas-dev · jreback · Nov 12, 2017 · Oct 23, 2017 · Oct 25, 2017 · Nov 10, 2017
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -60,7 +60,7 @@ Deprecations
 Removal of prior version deprecations/changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
--
+- Warnings against the obsolete usage ``Categorical(codes, categories)``, which were emitted for instance when the first two arguments to ``Categorical()`` had different dtypes, and recommended the use of ``Categorical.from_codes``, have now been removed (:issue:`8074`)
 -
 -
 

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -25,7 +25,6 @@
     is_timedelta64_dtype,
     is_categorical,
     is_categorical_dtype,
-    is_integer_dtype,
     is_list_like, is_sequence,
     is_scalar,
     is_dict_like)
@@ -261,6 +260,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
         #    c.) infer from values
 
         if dtype is not None:
+            # The dtype argument takes precedence over values.dtype (if any)
             if isinstance(dtype, compat.string_types):
                 if dtype == 'category':
                     dtype = CategoricalDtype(categories, ordered)
@@ -275,9 +275,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
             ordered = dtype.ordered
 
         elif is_categorical(values):
+            # If no "dtype" was passed, use the one from "values", but honor
+            # the "ordered" and "categories" arguments
             dtype = values.dtype._from_categorical_dtype(values.dtype,
                                                          categories, ordered)
         else:
+            # If dtype=None and values is not categorical, create a new dtype
             dtype = CategoricalDtype(categories, ordered)
 
         # At this point, dtype is always a CategoricalDtype
@@ -294,28 +297,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
 
         # sanitize input
         if is_categorical_dtype(values):
+            if dtype.categories is None:
+                dtype = CategoricalDtype(values.categories, dtype.ordered)
 
-            # we are either a Series or a CategoricalIndex
-            if isinstance(values, (ABCSeries, ABCCategoricalIndex)):
-                values = values._values
-
-            if ordered is None:
-                ordered = values.ordered
-            if categories is None:
-                categories = values.categories
-            values = values.get_values()
-
-        elif isinstance(values, (ABCIndexClass, ABCSeries)):
-            # we'll do inference later
-            pass
-
-        else:
-
-            # on numpy < 1.6 datetimelike get inferred to all i8 by
-            # _sanitize_array which is fine, but since factorize does this
-            # correctly no need here this is an issue because _sanitize_array
-            # also coerces np.nan to a string under certain versions of numpy
-            # as well
+        elif not isinstance(values, (ABCIndexClass, ABCSeries)):
+            # _sanitize_array coerces np.nan to a string under certain versions
+            # of numpy
             values = maybe_infer_to_datetimelike(values, convert_dates=True)
             if not isinstance(values, np.ndarray):
                 values = _convert_to_list_like(values)
@@ -335,7 +322,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
                 codes, categories = factorize(values, sort=True)
             except TypeError:
                 codes, categories = factorize(values, sort=False)
-                if ordered:
+                if dtype.ordered:
                     # raise, as we don't have a sortable data structure and so
                     # the user should give us one by specifying categories
                     raise TypeError("'values' is not ordered, please "
@@ -347,34 +334,18 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
                 raise NotImplementedError("> 1 ndim Categorical are not "
                                           "supported at this time")
 
-            if dtype.categories is None:
-                # we're inferring from values
-                dtype = CategoricalDtype(categories, ordered)
+            # we're inferring from values
+            dtype = CategoricalDtype(categories, dtype.ordered)
 
-        else:
-            # there were two ways if categories are present
-            # - the old one, where each value is a int pointer to the levels
-            #   array -> not anymore possible, but code outside of pandas could
-            #   call us like that, so make some checks
-            # - the new one, where each value is also in the categories array
-            #   (or np.nan)
+        elif is_categorical_dtype(values):
+            old_codes = (values.cat.codes if isinstance(values, ABCSeries)
+                         else values.codes)
+            codes = _recode_for_categories(old_codes, values.dtype.categories,
+                                           dtype.categories)
 
+        else:
             codes = _get_codes_for_values(values, dtype.categories)
 
-            # TODO: check for old style usage. These warnings should be removes
-            # after 0.18/ in 2016
-            if (is_integer_dtype(values) and
-                    not is_integer_dtype(dtype.categories)):
-                warn("Values and categories have different dtypes. Did you "
-                     "mean to use\n'Categorical.from_codes(codes, "
-                     "categories)'?", RuntimeWarning, stacklevel=2)
-
-            if (len(values) and is_integer_dtype(values) and
-                    (codes == -1).all()):
-                warn("None of the categories were found in values. Did you "
-                     "mean to use\n'Categorical.from_codes(codes, "
-                     "categories)'?", RuntimeWarning, stacklevel=2)
-
         if null_mask.any():
             # Reinsert -1 placeholders for previously removed missing values
             full_codes = - np.ones(null_mask.shape, dtype=codes.dtype)

diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
@@ -306,20 +306,18 @@ def f():
         assert len(cat.codes) == 1
         assert cat.codes[0] == 0
 
-        # Catch old style constructor useage: two arrays, codes + categories
-        # We can only catch two cases:
+        # two arrays
         #  - when the first is an integer dtype and the second is not
         #  - when the resulting codes are all -1/NaN
-        with tm.assert_produces_warning(RuntimeWarning):
+        with tm.assert_produces_warning(None):
             c_old = Categorical([0, 1, 2, 0, 1, 2],
                                 categories=["a", "b", "c"])  # noqa
 
-        with tm.assert_produces_warning(RuntimeWarning):
+        with tm.assert_produces_warning(None):
             c_old = Categorical([0, 1, 2, 0, 1, 2],  # noqa
                                 categories=[3, 4, 5])
 
-        # the next one are from the old docs, but unfortunately these don't
-        # trigger :-(
+        # the next one are from the old docs
         with tm.assert_produces_warning(None):
             c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3])  # noqa
             cat = Categorical([1, 2], categories=[1, 2, 3])
-Original file line number
+Diff line change
@@ Expand Up / @@ -60,7 +60,7 @@ Deprecations @@
     Removal of prior version deprecations/changes
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    -
+    - Warnings against the obsolete usage ``Categorical(codes, categories)``, which were emitted for instance when the first two arguments to ``Categorical()`` had different dtypes, and recommended the use of ``Categorical.from_codes``, have now been removed (:issue:`8074`)
     -
     -
@@ Expand Down @@