DEPR: passing categories or ordered kwargs to Series.astype is deprecated (pandas-dev#17742)

jreback · Krzysztof Chomski · commit bdb1a3dd0f52 · 2017-10-16T16:12:24.000+02:00
closes pandas-dev#17636
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -606,6 +606,7 @@ Deprecations
 - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`)
 - ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`)
 - ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation <timeseries.custom-freq-ranges>` for more details (:issue:`17596`)
+- passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype <whatsnew_0210.enhancements.categorical_dtype>` (:issue:`17636`)
 
 .. _whatsnew_0210.deprecations.argmin_min:
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1,3 +1,4 @@
+import warnings
 import copy
 from warnings import catch_warnings
 import itertools
@@ -547,12 +548,20 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
         # may need to convert to categorical
         # this is only called for non-categoricals
         if self.is_categorical_astype(dtype):
-            if (('categories' in kwargs or 'ordered' in kwargs) and
-                    isinstance(dtype, CategoricalDtype)):
-                raise TypeError("Cannot specify a CategoricalDtype and also "
-                                "`categories` or `ordered`. Use "
-                                "`dtype=CategoricalDtype(categories, ordered)`"
-                                " instead.")
+
+            # deprecated 17636
+            if ('categories' in kwargs or 'ordered' in kwargs):
+                if isinstance(dtype, CategoricalDtype):
+                    raise TypeError(
+                        "Cannot specify a CategoricalDtype and also "
+                        "`categories` or `ordered`. Use "
+                        "`dtype=CategoricalDtype(categories, ordered)`"
+                        " instead.")
+                warnings.warn("specifying 'categories' or 'ordered' in "
+                              ".astype() is deprecated; pass a "
+                              "CategoricalDtype instead",
+                              FutureWarning, stacklevel=7)
+
             kwargs = kwargs.copy()
             categories = getattr(dtype, 'categories', None)
             ordered = getattr(dtype, 'ordered', False)
diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py
@@ -8,6 +8,7 @@
 
 import pandas as pd
 from pandas.compat import lrange
+from pandas.api.types import CategoricalDtype
 from pandas import (DataFrame, Series, MultiIndex, Timestamp,
                     date_range, NaT, IntervalIndex)
 
@@ -513,7 +514,7 @@ def test_sort_index_categorical_index(self):
 
         df = (DataFrame({'A': np.arange(6, dtype='int64'),
                          'B': Series(list('aabbca'))
-                         .astype('category', categories=list('cab'))})
+                         .astype(CategoricalDtype(list('cab')))})
               .set_index('B'))
 
         result = df.sort_index()
diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py
@@ -186,6 +186,16 @@ def test_astype_dict_like(self, dtype_class):
         with pytest.raises(KeyError):
             s.astype(dt5)
 
+    def test_astype_categories_deprecation(self):
+
+        # deprecated 17636
+        s = Series(['a', 'b', 'a'])
+        expected = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            result = s.astype('category', categories=['a', 'b'], ordered=True)
+        tm.assert_series_equal(result, expected)
+
     def test_astype_categoricaldtype(self):
         s = Series(['a', 'b', 'a'])
         result = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py
@@ -7,7 +7,8 @@
 from numpy import nan
 import numpy as np
 
-from pandas import (Series, date_range, NaT)
+from pandas import Series, date_range, NaT
+from pandas.api.types import CategoricalDtype
 
 from pandas.compat import product
 from pandas.util.testing import assert_series_equal
@@ -123,50 +124,34 @@ def test_rank_categorical(self):
         exp_desc = Series([6., 5., 4., 3., 2., 1.])
         ordered = Series(
             ['first', 'second', 'third', 'fourth', 'fifth', 'sixth']
-        ).astype(
-            'category',
-            categories=['first', 'second', 'third',
-                        'fourth', 'fifth', 'sixth'],
-            ordered=True
-        )
+        ).astype(CategoricalDtype(categories=['first', 'second', 'third',
+                                              'fourth', 'fifth', 'sixth'],
+                                  ordered=True))
         assert_series_equal(ordered.rank(), exp)
         assert_series_equal(ordered.rank(ascending=False), exp_desc)
 
         # Unordered categoricals should be ranked as objects
-        unordered = Series(
-            ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'],
-        ).astype(
-            'category',
-            categories=['first', 'second', 'third',
-                        'fourth', 'fifth', 'sixth'],
-            ordered=False
-        )
+        unordered = Series(['first', 'second', 'third', 'fourth',
+                            'fifth', 'sixth']).astype(
+            CategoricalDtype(categories=['first', 'second', 'third',
+                                         'fourth', 'fifth', 'sixth'],
+                             ordered=False))
         exp_unordered = Series([2., 4., 6., 3., 1., 5.])
         res = unordered.rank()
         assert_series_equal(res, exp_unordered)
 
         unordered1 = Series(
             [1, 2, 3, 4, 5, 6],
-        ).astype(
-            'category',
-            categories=[1, 2, 3, 4, 5, 6],
-            ordered=False
-        )
+        ).astype(CategoricalDtype([1, 2, 3, 4, 5, 6], False))
         exp_unordered1 = Series([1., 2., 3., 4., 5., 6.])
         res1 = unordered1.rank()
         assert_series_equal(res1, exp_unordered1)
 
         # Test na_option for rank data
         na_ser = Series(
             ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN]
-        ).astype(
-            'category',
-            categories=[
-                'first', 'second', 'third', 'fourth',
-                'fifth', 'sixth', 'seventh'
-            ],
-            ordered=True
-        )
+        ).astype(CategoricalDtype(['first', 'second', 'third', 'fourth',
+                                   'fifth', 'sixth', 'seventh'], True))
 
         exp_top = Series([2., 3., 4., 5., 6., 7., 1.])
         exp_bot = Series([1., 2., 3., 4., 5., 6., 7.])
@@ -195,13 +180,8 @@ def test_rank_categorical(self):
         )
 
         # Test with pct=True
-        na_ser = Series(
-            ['first', 'second', 'third', 'fourth', np.NaN],
-        ).astype(
-            'category',
-            categories=['first', 'second', 'third', 'fourth'],
-            ordered=True
-        )
+        na_ser = Series(['first', 'second', 'third', 'fourth', np.NaN]).astype(
+            CategoricalDtype(['first', 'second', 'third', 'fourth'], True))
         exp_top = Series([0.4, 0.6, 0.8, 1., 0.2])
         exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.])
         exp_keep = Series([0.25, 0.5, 0.75, 1., np.NaN])
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
@@ -84,17 +84,17 @@ def test_getitem_category_type(self):
 
         # get slice
         result = s.iloc[0:2]
-        expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3])
+        expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
         tm.assert_series_equal(result, expected)
 
         # get list of indexes
         result = s.iloc[[0, 1]]
-        expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3])
+        expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
         tm.assert_series_equal(result, expected)
 
         # get boolean array
         result = s.iloc[[True, False, False]]
-        expected = pd.Series([1]).astype('category', categories=[1, 2, 3])
+        expected = pd.Series([1]).astype(CategoricalDtype([1, 2, 3]))
         tm.assert_series_equal(result, expected)
 
     def test_setitem(self):
@@ -2076,12 +2076,12 @@ def test_creation_astype(self):
         l = ["a", "b", "c", "a"]
         s = pd.Series(l)
         exp = pd.Series(Categorical(l, ordered=True))
-        res = s.astype('category', ordered=True)
+        res = s.astype(CategoricalDtype(None, ordered=True))
         tm.assert_series_equal(res, exp)
 
         exp = pd.Series(Categorical(
             l, categories=list('abcdef'), ordered=True))
-        res = s.astype('category', categories=list('abcdef'), ordered=True)
+        res = s.astype(CategoricalDtype(list('abcdef'), ordered=True))
         tm.assert_series_equal(res, exp)
 
     def test_construction_series(self):
@@ -4262,11 +4262,11 @@ def test_concat_preserve(self):
         b = Series(list('aabbca'))
 
         df2 = DataFrame({'A': a,
-                         'B': b.astype('category', categories=list('cab'))})
+                         'B': b.astype(CategoricalDtype(list('cab')))})
         res = pd.concat([df2, df2])
-        exp = DataFrame({'A': pd.concat([a, a]),
-                         'B': pd.concat([b, b]).astype(
-            'category', categories=list('cab'))})
+        exp = DataFrame(
+            {'A': pd.concat([a, a]),
+             'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))})
         tm.assert_frame_equal(res, exp)
 
     def test_categorical_index_preserver(self):
@@ -4275,13 +4275,13 @@ def test_categorical_index_preserver(self):
         b = Series(list('aabbca'))
 
         df2 = DataFrame({'A': a,
-                         'B': b.astype('category', categories=list('cab'))
+                         'B': b.astype(CategoricalDtype(list('cab')))
                          }).set_index('B')
         result = pd.concat([df2, df2])
-        expected = DataFrame({'A': pd.concat([a, a]),
-                              'B': pd.concat([b, b]).astype(
-                                  'category', categories=list('cab'))
-                              }).set_index('B')
+        expected = DataFrame(
+            {'A': pd.concat([a, a]),
+             'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))
+             }).set_index('B')
         tm.assert_frame_equal(result, expected)
 
         # wrong catgories
@@ -4324,7 +4324,7 @@ def test_merge(self):
         cright = right.copy()
         cright['d'] = cright['d'].astype('category')
         result = pd.merge(left, cright, how='left', left_on='b', right_on='c')
-        expected['d'] = expected['d'].astype('category', categories=['null'])
+        expected['d'] = expected['d'].astype(CategoricalDtype(['null']))
         tm.assert_frame_equal(result, expected)
 
         # cat-object