deprecate categories and ordered parameters

topper-123 · topper-123 · commit 6997fd8d0099 · 2018-12-23T16:24:11.000Z
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1123,6 +1123,7 @@ Deprecations
 - :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
 - The signature of :meth:`Series.to_csv` has been uniformed to that of :meth:`DataFrame.to_csv`: the name of the first argument is now ``path_or_buf``, the order of subsequent arguments has changed, the ``header`` argument now defaults to ``True``. (:issue:`19715`)
 - :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
+- :meth:`Categorical.from_codes` has deprecated parameters ``categories`` and ``ordered``. Supply a :class:`~pandas.api.types.CategoricalDtype` to new parameter ``dtype`` instead. (:issue:`24398`)
 - :func:`pandas.read_table` is deprecated. Instead, use :func:`read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`)
 - :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain
   many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -639,11 +639,13 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
         return cls(codes, dtype=dtype, fastpath=True)
 
     @classmethod
+    @deprecate_kwarg(old_arg_name='categories', new_arg_name=None)
+    @deprecate_kwarg(old_arg_name='ordered', new_arg_name=None)
     def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
         """
-        Make a Categorical type from codes and categories arrays.
+        Make a Categorical type from codes and CategoricalDtype.
 
-        This constructor is useful if you already have codes and categories and
+        This constructor is useful if you already have codes and the dtype and
         so do not need the (computation intensive) factorization step, which is
         usually done on the constructor.
 
@@ -657,16 +659,17 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
             categories or -1 for NaN
         categories : index-like, optional
             The categories for the categorical. Items need to be unique.
+
+            .. deprecated:: 0.24.0
+                Use ``dtype`` instead.
         ordered : bool, optional
             Whether or not this categorical is treated as an ordered
             categorical. If not given, the resulting categorical will be
             unordered.
 
-            .. versionchanged:: 0.24.0
-
-                The default value has been changed to  ``None``. Previously
-                the default value was ``False``.
-        dtype : CategoricalDtype, optional
+            .. deprecated:: 0.24.0
+                Use ``dtype`` instead.
+        dtype : CategoricalDtype
             An instance of ``CategoricalDtype`` to use for this categorical.
 
             .. versionadded:: 0.24.0
@@ -682,6 +685,8 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
             if categories is not None or ordered is not None:
                 raise ValueError("Cannot specify `categories` or `ordered` "
                                  "together with `dtype`.")
+        elif categories is None and dtype is None:
+            raise ValueError("Must specify `categories` or `dtype`.")
         else:
             dtype = CategoricalDtype(categories, ordered)
 
@@ -1245,9 +1250,8 @@ def map(self, mapper):
         """
         new_categories = self.categories.map(mapper)
         try:
-            return self.from_codes(self._codes.copy(),
-                                   categories=new_categories,
-                                   ordered=self.ordered)
+            new_dtype = CategoricalDtype(new_categories, ordered=self.ordered)
+            return self.from_codes(self._codes.copy(), dtype=new_dtype)
         except ValueError:
             return np.take(new_categories, self._codes)
 
diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
@@ -21,18 +21,13 @@ class TestCategoricalConstructors(object):
     def test_validate_ordered(self):
         # see gh-14058
         exp_msg = "'ordered' must either be 'True' or 'False'"
-        exp_err = TypeError
 
-        # This should be a boolean.
+        # This should be a boolean or None.
         ordered = np.array([0, 1, 2])
 
-        with pytest.raises(exp_err, match=exp_msg):
+        with pytest.raises(TypeError, match=exp_msg):
             Categorical([1, 2, 3], ordered=ordered)
 
-        with pytest.raises(exp_err, match=exp_msg):
-            Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'],
-                                   ordered=ordered)
-
     def test_constructor_empty(self):
         # GH 17248
         c = Categorical([])
@@ -421,76 +416,41 @@ def test_constructor_with_categorical_categories(self):
         tm.assert_categorical_equal(result, expected)
 
     def test_from_codes(self):
+        dtype = CategoricalDtype(categories=[1, 2])
+
+        # no dtype or categories
+        msg = "Must specify `categories` or `dtype`."
+        with pytest.raises(ValueError, match=msg):
+            Categorical.from_codes([1, 2])
 
         # too few categories
-        dtype = CategoricalDtype(categories=[1, 2])
         msg = "codes need to be between "
-        with pytest.raises(ValueError, match=msg):
-            Categorical.from_codes([1, 2], categories=dtype.categories)
         with pytest.raises(ValueError, match=msg):
             Categorical.from_codes([1, 2], dtype=dtype)
 
         # no int codes
         msg = "codes need to be array-like integers"
-        with pytest.raises(ValueError, match=msg):
-            Categorical.from_codes(["a"], categories=dtype.categories)
         with pytest.raises(ValueError, match=msg):
             Categorical.from_codes(["a"], dtype=dtype)
 
-        # no unique categories
-        with pytest.raises(ValueError,
-                           match="Categorical categories must be unique"):
-            Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])
-
-        # NaN categories included
-        with pytest.raises(ValueError,
-                           match="Categorial categories cannot be null"):
-            Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])
-
         # too negative
         dtype = CategoricalDtype(categories=["a", "b", "c"])
         msg = r"codes need to be between -1 and len\(categories\)-1"
-        with pytest.raises(ValueError, match=msg):
-            Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
         with pytest.raises(ValueError, match=msg):
             Categorical.from_codes([-2, 1, 2], dtype=dtype)
 
         exp = Categorical(["a", "b", "c"], ordered=False)
-        res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
-        tm.assert_categorical_equal(exp, res)
-
         res = Categorical.from_codes([0, 1, 2], dtype=dtype)
         tm.assert_categorical_equal(exp, res)
 
         codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
         dtype = CategoricalDtype(categories=["train", "test"])
-        Categorical.from_codes(codes, categories=dtype.categories)
         Categorical.from_codes(codes, dtype=dtype)
 
-    def test_from_codes_with_categorical_categories(self):
-        # GH17884
-        expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
-
-        result = Categorical.from_codes(
-            [0, 1], categories=Categorical(['a', 'b', 'c']))
-        tm.assert_categorical_equal(result, expected)
-
-        result = Categorical.from_codes(
-            [0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
-        tm.assert_categorical_equal(result, expected)
-
-        # non-unique Categorical still raises
-        with pytest.raises(ValueError,
-                           match="Categorical categories must be unique"):
-            Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))
-
     def test_from_codes_with_nan_code(self):
         # GH21767
         codes = [1, 2, np.nan]
         dtype = CategoricalDtype(categories=['a', 'b', 'c'])
-        with pytest.raises(ValueError,
-                           match="codes need to be array-like integers"):
-            Categorical.from_codes(codes, categories=dtype.categories)
         with pytest.raises(ValueError,
                            match="codes need to be array-like integers"):
             Categorical.from_codes(codes, dtype=dtype)
@@ -500,36 +460,41 @@ def test_from_codes_with_float(self):
         codes = [1.0, 2.0, 0]  # integer, but in float dtype
         dtype = CategoricalDtype(categories=['a', 'b', 'c'])
 
-        with tm.assert_produces_warning(FutureWarning):
-            cat = Categorical.from_codes(codes, dtype.categories)
-        tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
-
-        with tm.assert_produces_warning(FutureWarning):
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
             cat = Categorical.from_codes(codes, dtype=dtype)
         tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
 
         codes = [1.1, 2.0, 0]  # non-integer
-        with pytest.raises(ValueError,
-                           match="codes need to be array-like integers"):
-            Categorical.from_codes(codes, dtype.categories)
         with pytest.raises(ValueError,
                            match="codes need to be array-like integers"):
             Categorical.from_codes(codes, dtype=dtype)
 
+    def test_from_codes_deprecated(self):
+        with tm.assert_produces_warning(FutureWarning):
+            Categorical.from_codes([0, 1], categories=['a', 'b'])
+
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            Categorical.from_codes([0, 1], categories=['a', 'b'], ordered=True)
+
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            Categorical.from_codes([0, 1], categories=['a', 'b'], ordered=False)
+
     @pytest.mark.parametrize('dtype', [None, 'category'])
     def test_from_inferred_categories(self, dtype):
         cats = ['a', 'b']
         codes = np.array([0, 0, 1, 1], dtype='i8')
         result = Categorical._from_inferred_categories(cats, codes, dtype)
-        expected = Categorical.from_codes(codes, cats)
+        expected = Categorical.from_codes(codes,
+                                          dtype=CategoricalDtype(cats))
         tm.assert_categorical_equal(result, expected)
 
     @pytest.mark.parametrize('dtype', [None, 'category'])
     def test_from_inferred_categories_sorts(self, dtype):
         cats = ['b', 'a']
         codes = np.array([0, 1, 1, 1], dtype='i8')
         result = Categorical._from_inferred_categories(cats, codes, dtype)
-        expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b'])
+        expected = Categorical.from_codes([1, 0, 0, 0],
+                                          dtype=CategoricalDtype(['a', 'b']))
         tm.assert_categorical_equal(result, expected)
 
     def test_from_inferred_categories_dtype(self):
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
@@ -433,8 +433,9 @@ def test_astype(self):
                                        right=[2, 4],
                                        closed='right')
 
+        dtype = CategoricalDtype(categories=ii, ordered=True)
         ci = CategoricalIndex(Categorical.from_codes(
-            [0, 1, -1], categories=ii, ordered=True))
+            [0, 1, -1], dtype=dtype))
 
         result = ci.astype('interval')
         expected = ii.take([0, 1, -1])