Skip to content

Commit bdb1a3d

Browse files
jrebackKrzysztof Chomski
authored and
Krzysztof Chomski
committed
DEPR: passing categories or ordered kwargs to Series.astype is deprecated (pandas-dev#17742)
closes pandas-dev#17636
1 parent 93e2405 commit bdb1a3d

File tree

6 files changed

+58
-57
lines changed

6 files changed

+58
-57
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,7 @@ Deprecations
606606
- :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`)
607607
- ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`)
608608
- ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation <timeseries.custom-freq-ranges>` for more details (:issue:`17596`)
609+
- passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype <whatsnew_0210.enhancements.categorical_dtype>` (:issue:`17636`)
609610

610611
.. _whatsnew_0210.deprecations.argmin_min:
611612

pandas/core/internals.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import warnings
12
import copy
23
from warnings import catch_warnings
34
import itertools
@@ -547,12 +548,20 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
547548
# may need to convert to categorical
548549
# this is only called for non-categoricals
549550
if self.is_categorical_astype(dtype):
550-
if (('categories' in kwargs or 'ordered' in kwargs) and
551-
isinstance(dtype, CategoricalDtype)):
552-
raise TypeError("Cannot specify a CategoricalDtype and also "
553-
"`categories` or `ordered`. Use "
554-
"`dtype=CategoricalDtype(categories, ordered)`"
555-
" instead.")
551+
552+
# deprecated 17636
553+
if ('categories' in kwargs or 'ordered' in kwargs):
554+
if isinstance(dtype, CategoricalDtype):
555+
raise TypeError(
556+
"Cannot specify a CategoricalDtype and also "
557+
"`categories` or `ordered`. Use "
558+
"`dtype=CategoricalDtype(categories, ordered)`"
559+
" instead.")
560+
warnings.warn("specifying 'categories' or 'ordered' in "
561+
".astype() is deprecated; pass a "
562+
"CategoricalDtype instead",
563+
FutureWarning, stacklevel=7)
564+
556565
kwargs = kwargs.copy()
557566
categories = getattr(dtype, 'categories', None)
558567
ordered = getattr(dtype, 'ordered', False)

pandas/tests/frame/test_sorting.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import pandas as pd
1010
from pandas.compat import lrange
11+
from pandas.api.types import CategoricalDtype
1112
from pandas import (DataFrame, Series, MultiIndex, Timestamp,
1213
date_range, NaT, IntervalIndex)
1314

@@ -513,7 +514,7 @@ def test_sort_index_categorical_index(self):
513514

514515
df = (DataFrame({'A': np.arange(6, dtype='int64'),
515516
'B': Series(list('aabbca'))
516-
.astype('category', categories=list('cab'))})
517+
.astype(CategoricalDtype(list('cab')))})
517518
.set_index('B'))
518519

519520
result = df.sort_index()

pandas/tests/series/test_dtypes.py

+10
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,16 @@ def test_astype_dict_like(self, dtype_class):
186186
with pytest.raises(KeyError):
187187
s.astype(dt5)
188188

189+
def test_astype_categories_deprecation(self):
190+
191+
# deprecated 17636
192+
s = Series(['a', 'b', 'a'])
193+
expected = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
194+
with tm.assert_produces_warning(FutureWarning,
195+
check_stacklevel=False):
196+
result = s.astype('category', categories=['a', 'b'], ordered=True)
197+
tm.assert_series_equal(result, expected)
198+
189199
def test_astype_categoricaldtype(self):
190200
s = Series(['a', 'b', 'a'])
191201
result = s.astype(CategoricalDtype(['a', 'b'], ordered=True))

pandas/tests/series/test_rank.py

+15-35
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from numpy import nan
88
import numpy as np
99

10-
from pandas import (Series, date_range, NaT)
10+
from pandas import Series, date_range, NaT
11+
from pandas.api.types import CategoricalDtype
1112

1213
from pandas.compat import product
1314
from pandas.util.testing import assert_series_equal
@@ -123,50 +124,34 @@ def test_rank_categorical(self):
123124
exp_desc = Series([6., 5., 4., 3., 2., 1.])
124125
ordered = Series(
125126
['first', 'second', 'third', 'fourth', 'fifth', 'sixth']
126-
).astype(
127-
'category',
128-
categories=['first', 'second', 'third',
129-
'fourth', 'fifth', 'sixth'],
130-
ordered=True
131-
)
127+
).astype(CategoricalDtype(categories=['first', 'second', 'third',
128+
'fourth', 'fifth', 'sixth'],
129+
ordered=True))
132130
assert_series_equal(ordered.rank(), exp)
133131
assert_series_equal(ordered.rank(ascending=False), exp_desc)
134132

135133
# Unordered categoricals should be ranked as objects
136-
unordered = Series(
137-
['first', 'second', 'third', 'fourth', 'fifth', 'sixth'],
138-
).astype(
139-
'category',
140-
categories=['first', 'second', 'third',
141-
'fourth', 'fifth', 'sixth'],
142-
ordered=False
143-
)
134+
unordered = Series(['first', 'second', 'third', 'fourth',
135+
'fifth', 'sixth']).astype(
136+
CategoricalDtype(categories=['first', 'second', 'third',
137+
'fourth', 'fifth', 'sixth'],
138+
ordered=False))
144139
exp_unordered = Series([2., 4., 6., 3., 1., 5.])
145140
res = unordered.rank()
146141
assert_series_equal(res, exp_unordered)
147142

148143
unordered1 = Series(
149144
[1, 2, 3, 4, 5, 6],
150-
).astype(
151-
'category',
152-
categories=[1, 2, 3, 4, 5, 6],
153-
ordered=False
154-
)
145+
).astype(CategoricalDtype([1, 2, 3, 4, 5, 6], False))
155146
exp_unordered1 = Series([1., 2., 3., 4., 5., 6.])
156147
res1 = unordered1.rank()
157148
assert_series_equal(res1, exp_unordered1)
158149

159150
# Test na_option for rank data
160151
na_ser = Series(
161152
['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN]
162-
).astype(
163-
'category',
164-
categories=[
165-
'first', 'second', 'third', 'fourth',
166-
'fifth', 'sixth', 'seventh'
167-
],
168-
ordered=True
169-
)
153+
).astype(CategoricalDtype(['first', 'second', 'third', 'fourth',
154+
'fifth', 'sixth', 'seventh'], True))
170155

171156
exp_top = Series([2., 3., 4., 5., 6., 7., 1.])
172157
exp_bot = Series([1., 2., 3., 4., 5., 6., 7.])
@@ -195,13 +180,8 @@ def test_rank_categorical(self):
195180
)
196181

197182
# Test with pct=True
198-
na_ser = Series(
199-
['first', 'second', 'third', 'fourth', np.NaN],
200-
).astype(
201-
'category',
202-
categories=['first', 'second', 'third', 'fourth'],
203-
ordered=True
204-
)
183+
na_ser = Series(['first', 'second', 'third', 'fourth', np.NaN]).astype(
184+
CategoricalDtype(['first', 'second', 'third', 'fourth'], True))
205185
exp_top = Series([0.4, 0.6, 0.8, 1., 0.2])
206186
exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.])
207187
exp_keep = Series([0.25, 0.5, 0.75, 1., np.NaN])

pandas/tests/test_categorical.py

+15-15
Original file line numberDiff line numberDiff line change
@@ -84,17 +84,17 @@ def test_getitem_category_type(self):
8484

8585
# get slice
8686
result = s.iloc[0:2]
87-
expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3])
87+
expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
8888
tm.assert_series_equal(result, expected)
8989

9090
# get list of indexes
9191
result = s.iloc[[0, 1]]
92-
expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3])
92+
expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
9393
tm.assert_series_equal(result, expected)
9494

9595
# get boolean array
9696
result = s.iloc[[True, False, False]]
97-
expected = pd.Series([1]).astype('category', categories=[1, 2, 3])
97+
expected = pd.Series([1]).astype(CategoricalDtype([1, 2, 3]))
9898
tm.assert_series_equal(result, expected)
9999

100100
def test_setitem(self):
@@ -2076,12 +2076,12 @@ def test_creation_astype(self):
20762076
l = ["a", "b", "c", "a"]
20772077
s = pd.Series(l)
20782078
exp = pd.Series(Categorical(l, ordered=True))
2079-
res = s.astype('category', ordered=True)
2079+
res = s.astype(CategoricalDtype(None, ordered=True))
20802080
tm.assert_series_equal(res, exp)
20812081

20822082
exp = pd.Series(Categorical(
20832083
l, categories=list('abcdef'), ordered=True))
2084-
res = s.astype('category', categories=list('abcdef'), ordered=True)
2084+
res = s.astype(CategoricalDtype(list('abcdef'), ordered=True))
20852085
tm.assert_series_equal(res, exp)
20862086

20872087
def test_construction_series(self):
@@ -4262,11 +4262,11 @@ def test_concat_preserve(self):
42624262
b = Series(list('aabbca'))
42634263

42644264
df2 = DataFrame({'A': a,
4265-
'B': b.astype('category', categories=list('cab'))})
4265+
'B': b.astype(CategoricalDtype(list('cab')))})
42664266
res = pd.concat([df2, df2])
4267-
exp = DataFrame({'A': pd.concat([a, a]),
4268-
'B': pd.concat([b, b]).astype(
4269-
'category', categories=list('cab'))})
4267+
exp = DataFrame(
4268+
{'A': pd.concat([a, a]),
4269+
'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))})
42704270
tm.assert_frame_equal(res, exp)
42714271

42724272
def test_categorical_index_preserver(self):
@@ -4275,13 +4275,13 @@ def test_categorical_index_preserver(self):
42754275
b = Series(list('aabbca'))
42764276

42774277
df2 = DataFrame({'A': a,
4278-
'B': b.astype('category', categories=list('cab'))
4278+
'B': b.astype(CategoricalDtype(list('cab')))
42794279
}).set_index('B')
42804280
result = pd.concat([df2, df2])
4281-
expected = DataFrame({'A': pd.concat([a, a]),
4282-
'B': pd.concat([b, b]).astype(
4283-
'category', categories=list('cab'))
4284-
}).set_index('B')
4281+
expected = DataFrame(
4282+
{'A': pd.concat([a, a]),
4283+
'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))
4284+
}).set_index('B')
42854285
tm.assert_frame_equal(result, expected)
42864286

42874287
# wrong catgories
@@ -4324,7 +4324,7 @@ def test_merge(self):
43244324
cright = right.copy()
43254325
cright['d'] = cright['d'].astype('category')
43264326
result = pd.merge(left, cright, how='left', left_on='b', right_on='c')
4327-
expected['d'] = expected['d'].astype('category', categories=['null'])
4327+
expected['d'] = expected['d'].astype(CategoricalDtype(['null']))
43284328
tm.assert_frame_equal(result, expected)
43294329

43304330
# cat-object

0 commit comments

Comments
 (0)