Skip to content

Commit 1cb8b25

Browse files
jorisvandenbosschealanbato
authored andcommitted
BUG: Categorical(Index) passed as categories (pandas-dev#17888)
1 parent 8fc9062 commit 1cb8b25

File tree

4 files changed

+48
-8
lines changed

4 files changed

+48
-8
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1024,6 +1024,7 @@ Categorical
10241024
- Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`)
10251025
- Bug in categorical operations with :ref:`Series.cat <categorical.cat>` not preserving the original Series' name (:issue:`17509`)
10261026
- Bug in :func:`DataFrame.merge` failing for categorical columns with boolean/int data types (:issue:`17187`)
1027+
- Bug in constructing a ``Categorical``/``CategoricalDtype`` when the specified ``categories`` are of categorical type (:issue:`17884`).
10271028

10281029
.. _whatsnew_0210.pypy:
10291030

pandas/core/dtypes/dtypes.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import re
44
import numpy as np
55
from pandas import compat
6-
from pandas.core.dtypes.generic import ABCIndexClass
6+
from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex
77

88

99
class ExtensionDtype(object):
@@ -170,16 +170,16 @@ def _from_categorical_dtype(cls, dtype, categories=None, ordered=None):
170170
return cls(categories, ordered)
171171

172172
def _finalize(self, categories, ordered, fastpath=False):
173-
from pandas.core.indexes.base import Index
174173

175174
if ordered is None:
176175
ordered = False
176+
else:
177+
self._validate_ordered(ordered)
177178

178179
if categories is not None:
179-
categories = Index(categories, tupleize_cols=False)
180-
# validation
181-
self._validate_categories(categories, fastpath=fastpath)
182-
self._validate_ordered(ordered)
180+
categories = self._validate_categories(categories,
181+
fastpath=fastpath)
182+
183183
self._categories = categories
184184
self._ordered = ordered
185185

@@ -316,7 +316,7 @@ def _validate_categories(categories, fastpath=False):
316316
from pandas import Index
317317

318318
if not isinstance(categories, ABCIndexClass):
319-
categories = Index(categories)
319+
categories = Index(categories, tupleize_cols=False)
320320

321321
if not fastpath:
322322

@@ -326,6 +326,9 @@ def _validate_categories(categories, fastpath=False):
326326
if not categories.is_unique:
327327
raise ValueError('Categorical categories must be unique')
328328

329+
if isinstance(categories, ABCCategoricalIndex):
330+
categories = categories.categories
331+
329332
return categories
330333

331334
@property

pandas/tests/dtypes/test_dtypes.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
import numpy as np
88
import pandas as pd
9-
from pandas import Series, Categorical, IntervalIndex, date_range
9+
from pandas import (
10+
Series, Categorical, CategoricalIndex, IntervalIndex, date_range)
1011

1112
from pandas.core.dtypes.dtypes import (
1213
DatetimeTZDtype, PeriodDtype,
@@ -657,3 +658,10 @@ def test_str_vs_repr(self):
657658
# Py2 will have unicode prefixes
658659
pat = r"CategoricalDtype\(categories=\[.*\], ordered=False\)"
659660
assert re.match(pat, repr(c1))
661+
662+
def test_categorical_categories(self):
663+
# GH17884
664+
c1 = CategoricalDtype(Categorical(['a', 'b']))
665+
tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
666+
c1 = CategoricalDtype(CategoricalIndex(['a', 'b']))
667+
tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))

pandas/tests/test_categorical.py

+28
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,18 @@ def test_contructor_from_categorical_string(self):
519519
result = Categorical(values, categories=['a', 'b', 'c'], ordered=True)
520520
tm.assert_categorical_equal(result, expected)
521521

522+
def test_constructor_with_categorical_categories(self):
523+
# GH17884
524+
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
525+
526+
result = Categorical(
527+
['a', 'b'], categories=Categorical(['a', 'b', 'c']))
528+
tm.assert_categorical_equal(result, expected)
529+
530+
result = Categorical(
531+
['a', 'b'], categories=CategoricalIndex(['a', 'b', 'c']))
532+
tm.assert_categorical_equal(result, expected)
533+
522534
def test_from_codes(self):
523535

524536
# too few categories
@@ -560,6 +572,22 @@ def f():
560572
codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
561573
pd.Categorical.from_codes(codes, categories=["train", "test"])
562574

575+
def test_from_codes_with_categorical_categories(self):
576+
# GH17884
577+
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])
578+
579+
result = Categorical.from_codes(
580+
[0, 1], categories=Categorical(['a', 'b', 'c']))
581+
tm.assert_categorical_equal(result, expected)
582+
583+
result = Categorical.from_codes(
584+
[0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
585+
tm.assert_categorical_equal(result, expected)
586+
587+
# non-unique Categorical still raises
588+
with pytest.raises(ValueError):
589+
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))
590+
563591
@pytest.mark.parametrize('dtype', [None, 'category'])
564592
def test_from_inferred_categories(self, dtype):
565593
cats = ['a', 'b']

0 commit comments

Comments
 (0)