Skip to content

Commit 096c7a5

Browse files
committed
deprecate categories and ordered parameters
1 parent 6e42d80 commit 096c7a5

File tree

15 files changed

+103
-127
lines changed

15 files changed

+103
-127
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1123,6 +1123,7 @@ Deprecations
11231123
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
11241124
- The signature of :meth:`Series.to_csv` has been uniformed to that of :meth:`DataFrame.to_csv`: the name of the first argument is now ``path_or_buf``, the order of subsequent arguments has changed, the ``header`` argument now defaults to ``True``. (:issue:`19715`)
11251125
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
1126+
- :meth:`Categorical.from_codes` has deprecated parameters ``categories`` and ``ordered``. Supply a :class:`~pandas.api.types.CategoricalDtype` to new parameter ``dtype`` instead. (:issue:`24398`)
11261127
- :func:`pandas.read_table` is deprecated. Instead, use :func:`read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`)
11271128
- :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain
11281129
many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`)

pandas/core/arrays/categorical.py

+16-10
Original file line numberDiff line numberDiff line change
@@ -641,9 +641,9 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
641641
@classmethod
642642
def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
643643
"""
644-
Make a Categorical type from codes and categories arrays.
644+
Make a Categorical type from codes and CategoricalDtype.
645645
646-
This constructor is useful if you already have codes and categories and
646+
This constructor is useful if you already have codes and the dtype and
647647
so do not need the (computation intensive) factorization step, which is
648648
usually done on the constructor.
649649
@@ -657,16 +657,17 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
657657
categories or -1 for NaN
658658
categories : index-like, optional
659659
The categories for the categorical. Items need to be unique.
660+
661+
.. deprecated:: 0.24.0
662+
Use ``dtype`` instead.
660663
ordered : bool, optional
661664
Whether or not this categorical is treated as an ordered
662665
categorical. If not given, the resulting categorical will be
663666
unordered.
664667
665-
.. versionchanged:: 0.24.0
666-
667-
The default value has been changed to ``None``. Previously
668-
the default value was ``False``.
669-
dtype : CategoricalDtype, optional
668+
.. deprecated:: 0.24.0
669+
Use ``dtype`` instead.
670+
dtype : CategoricalDtype
670671
An instance of ``CategoricalDtype`` to use for this categorical.
671672
672673
.. versionadded:: 0.24.0
@@ -682,7 +683,13 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
682683
if categories is not None or ordered is not None:
683684
raise ValueError("Cannot specify `categories` or `ordered` "
684685
"together with `dtype`.")
686+
elif categories is None and dtype is None:
687+
raise ValueError("Must specify `categories` or `dtype`.")
685688
else:
689+
msg = ("the 'categories' keyword is deprecated and will be removed "
690+
"in a future version.Please take steps to stop the use of "
691+
"'categories'")
692+
warn(msg, FutureWarning, stacklevel=2)
686693
dtype = CategoricalDtype(categories, ordered)
687694

688695
codes = np.asarray(codes) # #21767
@@ -1245,9 +1252,8 @@ def map(self, mapper):
12451252
"""
12461253
new_categories = self.categories.map(mapper)
12471254
try:
1248-
return self.from_codes(self._codes.copy(),
1249-
categories=new_categories,
1250-
ordered=self.ordered)
1255+
new_dtype = CategoricalDtype(new_categories, ordered=self.ordered)
1256+
return self.from_codes(self._codes.copy(), dtype=new_dtype)
12511257
except ValueError:
12521258
return np.take(new_categories, self._codes)
12531259

pandas/core/groupby/grouper.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -290,23 +290,22 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
290290
elif is_categorical_dtype(self.grouper):
291291

292292
from pandas.core.groupby.categorical import recode_for_groupby
293+
from pandas.api.types import CategoricalDtype
293294
self.grouper, self.all_grouper = recode_for_groupby(
294295
self.grouper, self.sort, observed)
295-
categories = self.grouper.categories
296+
dtype = CategoricalDtype(self.grouper.categories,
297+
ordered=self.grouper.ordered)
296298

297299
# we make a CategoricalIndex out of the cat grouper
298300
# preserving the categories / ordered attributes
299301
self._labels = self.grouper.codes
300302
if observed:
301303
codes = algorithms.unique1d(self.grouper.codes)
302304
else:
303-
codes = np.arange(len(categories))
305+
codes = np.arange(len(dtype.categories))
304306

305307
self._group_index = CategoricalIndex(
306-
Categorical.from_codes(
307-
codes=codes,
308-
categories=categories,
309-
ordered=self.grouper.ordered))
308+
Categorical.from_codes(codes=codes, dtype=dtype))
310309

311310
# we are done
312311
if isinstance(self.grouper, Grouping):
@@ -395,8 +394,8 @@ def _make_labels(self):
395394

396395
@cache_readonly
397396
def groups(self):
398-
return self.index.groupby(Categorical.from_codes(self.labels,
399-
self.group_index))
397+
return self.index.groupby(
398+
Categorical(self.labels, self.group_index, fastpath=True))
400399

401400

402401
def _get_grouper(obj, key=None, axis=0, level=None, sort=True,

pandas/core/indexes/multi.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -2025,13 +2025,14 @@ def _get_codes_for_sorting(self):
20252025
"""
20262026
from pandas.core.arrays import Categorical
20272027

2028-
def cats(level_codes):
2029-
return np.arange(np.array(level_codes).max() + 1 if
2028+
def as_dtype(level_codes):
2029+
from pandas.api.types import CategoricalDtype
2030+
cats = np.arange(np.array(level_codes).max() + 1 if
20302031
len(level_codes) else 0,
20312032
dtype=level_codes.dtype)
2033+
return CategoricalDtype(cats, ordered=True)
20322034

2033-
return [Categorical.from_codes(level_codes, cats(level_codes),
2034-
ordered=True)
2035+
return [Categorical.from_codes(level_codes, dtype=as_dtype(level_codes))
20352036
for level_codes in self.codes]
20362037

20372038
def sortlevel(self, level=0, ascending=True, sort_remaining=True):

pandas/io/packers.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Float64Index,
6060
Index, Int64Index, Interval, IntervalIndex, MultiIndex, NaT, Panel, Period,
6161
PeriodIndex, RangeIndex, Series, TimedeltaIndex, Timestamp)
62+
from pandas.api.types import CategoricalDtype as CDT
6263
from pandas.core import internals
6364
from pandas.core.arrays import IntervalArray, PeriodArray
6465
from pandas.core.arrays.sparse import BlockIndex, IntIndex
@@ -620,9 +621,8 @@ def decode(obj):
620621
name=obj[u'name'])
621622
elif typ == u'category':
622623
from_codes = globals()[obj[u'klass']].from_codes
623-
return from_codes(codes=obj[u'codes'],
624-
categories=obj[u'categories'],
625-
ordered=obj[u'ordered'])
624+
dtype = CDT(obj[u'categories'], ordered=obj[u'ordered'])
625+
return from_codes(codes=obj[u'codes'], dtype=dtype)
626626

627627
elif typ == u'interval':
628628
return Interval(obj[u'left'], obj[u'right'], obj[u'closed'])

pandas/io/pytables.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
DataFrame, DatetimeIndex, Index, Int64Index, MultiIndex, Panel,
3131
PeriodIndex, Series, SparseDataFrame, SparseSeries, TimedeltaIndex, compat,
3232
concat, isna, to_datetime)
33+
from pandas.api.types import CategoricalDtype
3334
from pandas.core import config
3435
from pandas.core.algorithms import match, unique
3536
from pandas.core.arrays.categorical import (
@@ -2206,10 +2207,8 @@ def convert(self, values, nan_rep, encoding, errors):
22062207
categories = categories[~mask]
22072208
codes[codes != -1] -= mask.astype(int).cumsum().values
22082209

2209-
self.data = Categorical.from_codes(codes,
2210-
categories=categories,
2211-
ordered=self.ordered)
2212-
2210+
dtype = CategoricalDtype(categories, ordered=self.ordered)
2211+
self.data = Categorical.from_codes(codes, dtype=dtype)
22132212
else:
22142213

22152214
try:

pandas/tests/arrays/categorical/test_constructors.py

+24-58
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,13 @@ class TestCategoricalConstructors(object):
2121
def test_validate_ordered(self):
2222
# see gh-14058
2323
exp_msg = "'ordered' must either be 'True' or 'False'"
24-
exp_err = TypeError
2524

26-
# This should be a boolean.
25+
# This should be a boolean or None.
2726
ordered = np.array([0, 1, 2])
2827

29-
with pytest.raises(exp_err, match=exp_msg):
28+
with pytest.raises(TypeError, match=exp_msg):
3029
Categorical([1, 2, 3], ordered=ordered)
3130

32-
with pytest.raises(exp_err, match=exp_msg):
33-
Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'],
34-
ordered=ordered)
35-
3631
def test_constructor_empty(self):
3732
# GH 17248
3833
c = Categorical([])
@@ -421,76 +416,41 @@ def test_constructor_with_categorical_categories(self):
421416
tm.assert_categorical_equal(result, expected)
422417

423418
def test_from_codes(self):
419+
dtype = CategoricalDtype(categories=[1, 2])
420+
421+
# no dtype or categories
422+
msg = "Must specify `categories` or `dtype`."
423+
with pytest.raises(ValueError, match=msg):
424+
Categorical.from_codes([1, 2])
424425

425426
# too few categories
426-
dtype = CategoricalDtype(categories=[1, 2])
427427
msg = "codes need to be between "
428-
with pytest.raises(ValueError, match=msg):
429-
Categorical.from_codes([1, 2], categories=dtype.categories)
430428
with pytest.raises(ValueError, match=msg):
431429
Categorical.from_codes([1, 2], dtype=dtype)
432430

433431
# no int codes
434432
msg = "codes need to be array-like integers"
435-
with pytest.raises(ValueError, match=msg):
436-
Categorical.from_codes(["a"], categories=dtype.categories)
437433
with pytest.raises(ValueError, match=msg):
438434
Categorical.from_codes(["a"], dtype=dtype)
439435

440-
# no unique categories
441-
with pytest.raises(ValueError,
442-
match="Categorical categories must be unique"):
443-
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])
444-
445-
# NaN categories included
446-
with pytest.raises(ValueError,
447-
match="Categorial categories cannot be null"):
448-
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])
449-
450436
# too negative
451437
dtype = CategoricalDtype(categories=["a", "b", "c"])
452438
msg = r"codes need to be between -1 and len\(categories\)-1"
453-
with pytest.raises(ValueError, match=msg):
454-
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
455439
with pytest.raises(ValueError, match=msg):
456440
Categorical.from_codes([-2, 1, 2], dtype=dtype)
457441

458442
exp = Categorical(["a", "b", "c"], ordered=False)
459-
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
460-
tm.assert_categorical_equal(exp, res)
461-
462443
res = Categorical.from_codes([0, 1, 2], dtype=dtype)
463444
tm.assert_categorical_equal(exp, res)
464445

465446
codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
466447
dtype = CategoricalDtype(categories=["train", "test"])
467-
Categorical.from_codes(codes, categories=dtype.categories)
468448
Categorical.from_codes(codes, dtype=dtype)
469449

470-
def test_from_codes_with_categorical_categories(self):
471-
# GH17884
472-
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
473-
474-
result = Categorical.from_codes(
475-
[0, 1], categories=Categorical(['a', 'b', 'c']))
476-
tm.assert_categorical_equal(result, expected)
477-
478-
result = Categorical.from_codes(
479-
[0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
480-
tm.assert_categorical_equal(result, expected)
481-
482-
# non-unique Categorical still raises
483-
with pytest.raises(ValueError,
484-
match="Categorical categories must be unique"):
485-
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))
486-
487450
def test_from_codes_with_nan_code(self):
488451
# GH21767
489452
codes = [1, 2, np.nan]
490453
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
491-
with pytest.raises(ValueError,
492-
match="codes need to be array-like integers"):
493-
Categorical.from_codes(codes, categories=dtype.categories)
494454
with pytest.raises(ValueError,
495455
match="codes need to be array-like integers"):
496456
Categorical.from_codes(codes, dtype=dtype)
@@ -500,36 +460,42 @@ def test_from_codes_with_float(self):
500460
codes = [1.0, 2.0, 0] # integer, but in float dtype
501461
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
502462

503-
with tm.assert_produces_warning(FutureWarning):
504-
cat = Categorical.from_codes(codes, dtype.categories)
505-
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
506-
507-
with tm.assert_produces_warning(FutureWarning):
463+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
508464
cat = Categorical.from_codes(codes, dtype=dtype)
509465
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
510466

511467
codes = [1.1, 2.0, 0] # non-integer
512-
with pytest.raises(ValueError,
513-
match="codes need to be array-like integers"):
514-
Categorical.from_codes(codes, dtype.categories)
515468
with pytest.raises(ValueError,
516469
match="codes need to be array-like integers"):
517470
Categorical.from_codes(codes, dtype=dtype)
518471

472+
def test_from_codes_deprecated(self):
473+
cats = ['a', 'b']
474+
with tm.assert_produces_warning(FutureWarning):
475+
Categorical.from_codes([0, 1], categories=cats)
476+
477+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
478+
Categorical.from_codes([0, 1], categories=cats, ordered=True)
479+
480+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
481+
Categorical.from_codes([0, 1], categories=cats, ordered=False)
482+
519483
@pytest.mark.parametrize('dtype', [None, 'category'])
520484
def test_from_inferred_categories(self, dtype):
521485
cats = ['a', 'b']
522486
codes = np.array([0, 0, 1, 1], dtype='i8')
523487
result = Categorical._from_inferred_categories(cats, codes, dtype)
524-
expected = Categorical.from_codes(codes, cats)
488+
expected = Categorical.from_codes(codes,
489+
dtype=CategoricalDtype(cats))
525490
tm.assert_categorical_equal(result, expected)
526491

527492
@pytest.mark.parametrize('dtype', [None, 'category'])
528493
def test_from_inferred_categories_sorts(self, dtype):
529494
cats = ['b', 'a']
530495
codes = np.array([0, 1, 1, 1], dtype='i8')
531496
result = Categorical._from_inferred_categories(cats, codes, dtype)
532-
expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b'])
497+
expected = Categorical.from_codes([1, 0, 0, 0],
498+
dtype=CategoricalDtype(['a', 'b']))
533499
tm.assert_categorical_equal(result, expected)
534500

535501
def test_from_inferred_categories_dtype(self):
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,28 @@
11
# -*- coding: utf-8 -*-
22

33
from pandas import Categorical
4+
from pandas.api.types import CategoricalDtype
45
import pandas.util.testing as tm
56

67

78
class TestCategoricalSubclassing(object):
89

910
def test_constructor(self):
10-
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
11-
assert isinstance(sc, tm.SubclassedCategorical)
12-
tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c']))
11+
subclassed = tm.SubclassedCategorical(['a', 'b', 'c'])
12+
assert isinstance(subclassed, tm.SubclassedCategorical)
13+
tm.assert_categorical_equal(subclassed, Categorical(['a', 'b', 'c']))
1314

1415
def test_from_codes(self):
15-
sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
16-
assert isinstance(sc, tm.SubclassedCategorical)
17-
exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
18-
tm.assert_categorical_equal(sc, exp)
16+
dtype = CategoricalDtype(['a', 'b', 'c'])
17+
subclassed = tm.SubclassedCategorical.from_codes([1, 0, 2], dtype=dtype)
18+
assert isinstance(subclassed, tm.SubclassedCategorical)
19+
20+
expected = Categorical.from_codes([1, 0, 2], dtype=dtype)
21+
tm.assert_categorical_equal(subclassed, expected)
1922

2023
def test_map(self):
21-
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
22-
res = sc.map(lambda x: x.upper())
23-
assert isinstance(res, tm.SubclassedCategorical)
24-
exp = Categorical(['A', 'B', 'C'])
25-
tm.assert_categorical_equal(res, exp)
24+
subclassed = tm.SubclassedCategorical(['a', 'b', 'c'])
25+
result = subclassed.map(lambda x: x.upper())
26+
assert isinstance(result, tm.SubclassedCategorical)
27+
expected = Categorical(['A', 'B', 'C'])
28+
tm.assert_categorical_equal(result, expected)

pandas/tests/arrays/test_period.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pandas.core.dtypes.dtypes import PeriodDtype
99

1010
import pandas as pd
11+
from pandas.api.types import CategoricalDtype as CDT
1112
from pandas.core.arrays import PeriodArray, period_array
1213
import pandas.util.testing as tm
1314

@@ -111,8 +112,8 @@ def test_astype_copies():
111112
def test_astype_categorical():
112113
arr = period_array(['2000', '2001', '2001', None], freq='D')
113114
result = arr.astype('category')
114-
categories = pd.PeriodIndex(['2000', '2001'], freq='D')
115-
expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories)
115+
dtype = CDT(categories=pd.PeriodIndex(['2000', '2001'], freq='D'))
116+
expected = pd.Categorical.from_codes([0, 1, 1, -1], dtype=dtype)
116117
tm.assert_categorical_equal(result, expected)
117118

118119

pandas/tests/indexes/test_category.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -433,8 +433,9 @@ def test_astype(self):
433433
right=[2, 4],
434434
closed='right')
435435

436+
dtype = CategoricalDtype(categories=ii, ordered=True)
436437
ci = CategoricalIndex(Categorical.from_codes(
437-
[0, 1, -1], categories=ii, ordered=True))
438+
[0, 1, -1], dtype=dtype))
438439

439440
result = ci.astype('interval')
440441
expected = ii.take([0, 1, -1])

0 commit comments

Comments
 (0)