Skip to content

Commit e2543df

Browse files
committed
Add dtype to Categorical.from_codes
1 parent 6111f64 commit e2543df

File tree

4 files changed

+64
-45
lines changed

4 files changed

+64
-45
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ Other Enhancements
360360
- :meth:`pandas.api.types.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``,
361361
all instances of ``set`` will not be considered "list-like" anymore (:issue:`23061`)
362362
- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`).
363+
- :meth:`Categorical.from_codes` now can take a dtype parameter (:issue:`24398`).
363364
- New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`).
364365
- Compatibility with Matplotlib 3.0 (:issue:`22790`).
365366
- Added :meth:`Interval.overlaps`, :meth:`IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`)

pandas/core/arrays/categorical.py

+24-21
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,7 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
639639
return cls(codes, dtype=dtype, fastpath=True)
640640

641641
@classmethod
642-
def from_codes(cls, codes, categories, ordered=False):
642+
def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
643643
"""
644644
Make a Categorical type from codes and categories arrays.
645645
@@ -657,11 +657,27 @@ def from_codes(cls, codes, categories, ordered=False):
657657
categories or -1 for NaN
658658
categories : index-like
659659
The categories for the categorical. Items need to be unique.
660-
ordered : boolean, (default False)
660+
ordered : boolean, optional
661661
Whether or not this categorical is treated as a ordered
662662
categorical. If not given, the resulting categorical will be
663663
unordered.
664+
665+
.. versionchanged:: 0.24.0
666+
667+
The default value has been changed to ``None``. Previously
668+
the default value was ``False``.
669+
dtype : CategoricalDtype, optional
670+
An instance of ``CategoricalDtype`` to use for this categorical.
671+
672+
.. versionadded:: 0.24.0
664673
"""
674+
if dtype is not None:
675+
if categories is not None or ordered is not None:
676+
raise ValueError("Cannot specify both `dtype` and `categories`"
677+
" or `ordered`.")
678+
else:
679+
dtype = CategoricalDtype(categories, ordered)
680+
665681
codes = np.asarray(codes) # #21767
666682
if not is_integer_dtype(codes):
667683
msg = "codes need to be array-like integers"
@@ -675,20 +691,12 @@ def from_codes(cls, codes, categories, ordered=False):
675691
if msg:
676692
raise ValueError(msg)
677693

678-
try:
679-
codes = coerce_indexer_dtype(codes, categories)
680-
except (ValueError, TypeError):
681-
raise ValueError(
682-
"codes need to be convertible to an arrays of integers")
683-
684-
categories = CategoricalDtype.validate_categories(categories)
685-
686-
if len(codes) and (codes.max() >= len(categories) or codes.min() < -1):
694+
if len(codes) and (
695+
codes.max() >= len(dtype.categories) or codes.min() < -1):
687696
raise ValueError("codes need to be between -1 and "
688697
"len(categories)-1")
689698

690-
return cls(codes, categories=categories, ordered=ordered,
691-
fastpath=True)
699+
return cls(codes, dtype=dtype, fastpath=True)
692700

693701
_codes = None
694702

@@ -1283,8 +1291,7 @@ def shift(self, periods):
12831291
else:
12841292
codes[periods:] = -1
12851293

1286-
return self.from_codes(codes, categories=self.categories,
1287-
ordered=self.ordered)
1294+
return self.from_codes(codes, dtype=self.dtype)
12881295

12891296
def __array__(self, dtype=None):
12901297
"""
@@ -1902,9 +1909,7 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None):
19021909

19031910
codes = take(self._codes, indexer, allow_fill=allow_fill,
19041911
fill_value=fill_value)
1905-
result = type(self).from_codes(codes,
1906-
categories=dtype.categories,
1907-
ordered=dtype.ordered)
1912+
result = type(self).from_codes(codes, dtype=dtype)
19081913
return result
19091914

19101915
take = take_nd
@@ -2093,9 +2098,7 @@ def __setitem__(self, key, value):
20932098
new_codes = _recode_for_categories(
20942099
value.codes, value.categories, self.categories
20952100
)
2096-
value = Categorical.from_codes(new_codes,
2097-
categories=self.categories,
2098-
ordered=self.ordered)
2101+
value = Categorical.from_codes(new_codes, dtype=self.dtype)
20992102

21002103
rvalue = value if is_list_like(value) else [value]
21012104

pandas/core/indexes/category.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,7 @@ def _create_from_codes(self, codes, dtype=None, name=None):
154154
dtype = self.dtype
155155
if name is None:
156156
name = self.name
157-
cat = Categorical.from_codes(codes, categories=dtype.categories,
158-
ordered=dtype.ordered)
157+
cat = Categorical.from_codes(codes, dtype=dtype)
159158
return CategoricalIndex(cat, name=name)
160159

161160
@classmethod

pandas/tests/arrays/categorical/test_constructors.py

+38-22
Original file line numberDiff line numberDiff line change
@@ -417,33 +417,44 @@ def test_constructor_with_categorical_categories(self):
417417
def test_from_codes(self):
418418

419419
# too few categories
420+
dtype = CategoricalDtype(categories=[1, 2])
420421
with pytest.raises(ValueError):
421-
Categorical.from_codes([1, 2], [1, 2])
422+
Categorical.from_codes([1, 2], categories=dtype.categories)
423+
with pytest.raises(ValueError):
424+
Categorical.from_codes([1, 2], dtype=dtype)
422425

423426
# no int codes
424427
with pytest.raises(ValueError):
425-
Categorical.from_codes(["a"], [1, 2])
428+
Categorical.from_codes(["a"], categories=dtype.categories)
429+
with pytest.raises(ValueError):
430+
Categorical.from_codes(["a"], dtype=dtype)
426431

427432
# no unique categories
428433
with pytest.raises(ValueError):
429-
Categorical.from_codes([0, 1, 2], ["a", "a", "b"])
434+
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])
430435

431436
# NaN categories included
432437
with pytest.raises(ValueError):
433-
Categorical.from_codes([0, 1, 2], ["a", "b", np.nan])
438+
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])
434439

435440
# too negative
441+
dtype = CategoricalDtype(categories=["a", "b", "c"])
442+
with pytest.raises(ValueError):
443+
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
436444
with pytest.raises(ValueError):
437-
Categorical.from_codes([-2, 1, 2], ["a", "b", "c"])
445+
Categorical.from_codes([-2, 1, 2], dtype=dtype)
438446

439447
exp = Categorical(["a", "b", "c"], ordered=False)
440-
res = Categorical.from_codes([0, 1, 2], ["a", "b", "c"])
448+
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
449+
tm.assert_categorical_equal(exp, res)
450+
451+
res = Categorical.from_codes([0, 1, 2], dtype=dtype)
441452
tm.assert_categorical_equal(exp, res)
442453

443-
# Not available in earlier numpy versions
444-
if hasattr(np.random, "choice"):
445-
codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
446-
Categorical.from_codes(codes, categories=["train", "test"])
454+
codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
455+
dtype = CategoricalDtype(categories=["train", "test"])
456+
Categorical.from_codes(codes, categories=dtype.categories)
457+
Categorical.from_codes(codes, dtype=dtype)
447458

448459
def test_from_codes_with_categorical_categories(self):
449460
# GH17884
@@ -464,22 +475,30 @@ def test_from_codes_with_categorical_categories(self):
464475
def test_from_codes_with_nan_code(self):
465476
# GH21767
466477
codes = [1, 2, np.nan]
467-
categories = ['a', 'b', 'c']
478+
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
468479
with pytest.raises(ValueError):
469-
Categorical.from_codes(codes, categories)
480+
Categorical.from_codes(codes, categories=dtype.categories)
481+
with pytest.raises(ValueError):
482+
Categorical.from_codes(codes, dtype=dtype)
470483

471484
def test_from_codes_with_float(self):
472485
# GH21767
473486
codes = [1.0, 2.0, 0] # integer, but in float dtype
474-
categories = ['a', 'b', 'c']
487+
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
475488

476489
with tm.assert_produces_warning(FutureWarning):
477-
cat = Categorical.from_codes(codes, categories)
490+
cat = Categorical.from_codes(codes, dtype.categories)
491+
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
492+
493+
with tm.assert_produces_warning(FutureWarning):
494+
cat = Categorical.from_codes(codes, dtype=dtype)
478495
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))
479496

480497
codes = [1.1, 2.0, 0] # non-integer
481498
with pytest.raises(ValueError):
482-
Categorical.from_codes(codes, categories)
499+
Categorical.from_codes(codes, dtype.categories)
500+
with pytest.raises(ValueError):
501+
Categorical.from_codes(codes, dtype=dtype)
483502

484503
@pytest.mark.parametrize('dtype', [None, 'category'])
485504
def test_from_inferred_categories(self, dtype):
@@ -515,14 +534,11 @@ def test_from_inferred_categories_coerces(self):
515534
expected = Categorical([1, 1, 2, np.nan])
516535
tm.assert_categorical_equal(result, expected)
517536

518-
def test_construction_with_ordered(self):
537+
@pytest.mark.parametrize('ordered', [None, True, False])
538+
def test_construction_with_ordered(self, ordered):
519539
# GH 9347, 9190
520-
cat = Categorical([0, 1, 2])
521-
assert not cat.ordered
522-
cat = Categorical([0, 1, 2], ordered=False)
523-
assert not cat.ordered
524-
cat = Categorical([0, 1, 2], ordered=True)
525-
assert cat.ordered
540+
cat = Categorical([0, 1, 2], ordered=ordered)
541+
assert cat.ordered == bool(ordered)
526542

527543
@pytest.mark.xfail(reason="Imaginary values not supported in Categorical")
528544
def test_constructor_imaginary(self):

0 commit comments

Comments
 (0)