-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Categorical(Index) passed as categories #17888
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
93bb429
7bc64bd
938e4e8
ab23d41
7fa8578
b020498
ba532ee
1c56abe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
import re | ||
import numpy as np | ||
from pandas import compat | ||
from pandas.core.dtypes.generic import ABCIndexClass | ||
from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex | ||
|
||
|
||
class ExtensionDtype(object): | ||
|
@@ -170,16 +170,16 @@ def _from_categorical_dtype(cls, dtype, categories=None, ordered=None): | |
return cls(categories, ordered) | ||
|
||
def _finalize(self, categories, ordered, fastpath=False): | ||
from pandas.core.indexes.base import Index | ||
|
||
if ordered is None: | ||
ordered = False | ||
else: | ||
self._validate_ordered(ordered) | ||
|
||
if categories is not None: | ||
categories = Index(categories, tupleize_cols=False) | ||
# validation | ||
self._validate_categories(categories, fastpath=fastpath) | ||
self._validate_ordered(ordered) | ||
categories = self._validate_categories(categories, | ||
fastpath=fastpath) | ||
|
||
self._categories = categories | ||
self._ordered = ordered | ||
|
||
|
@@ -316,7 +316,10 @@ def _validate_categories(categories, fastpath=False): | |
from pandas import Index | ||
|
||
if not isinstance(categories, ABCIndexClass): | ||
categories = Index(categories) | ||
categories = Index(categories, tupleize_cols=False) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be an elif and instead should use is_categorical There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't the point of the ABC ones to avoid imports from itself ? (as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback Can you answer this one? No problem to change it if you think it is better, but it seems to me using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, yeah I guess you have to do this. but you can also check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated to use |
||
if isinstance(categories, ABCCategoricalIndex): | ||
categories = categories.categories | ||
|
||
if not fastpath: | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -519,6 +519,18 @@ def test_contructor_from_categorical_string(self): | |
result = Categorical(values, categories=['a', 'b', 'c'], ordered=True) | ||
tm.assert_categorical_equal(result, expected) | ||
|
||
def test_constructor_with_categorical_categories(self): | ||
# GH17884 | ||
expected = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) | ||
|
||
result = pd.Categorical( | ||
['a', 'b'], categories=pd.Categorical(['a', 'b', 'c'])) | ||
tm.assert_categorical_equal(result, expected) | ||
|
||
result = pd.Categorical( | ||
['a', 'b'], categories=pd.CategoricalIndex(['a', 'b', 'c'])) | ||
tm.assert_categorical_equal(result, expected) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, thanks! (updated) |
||
|
||
def test_from_codes(self): | ||
|
||
# too few categories | ||
|
@@ -560,6 +572,18 @@ def f(): | |
codes = np.random.choice([0, 1], 5, p=[0.9, 0.1]) | ||
pd.Categorical.from_codes(codes, categories=["train", "test"]) | ||
|
||
def test_from_codes_with_categorical_categories(self): | ||
# GH17884 | ||
expected = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) | ||
|
||
result = pd.Categorical.from_codes( | ||
[0, 1], categories=pd.Categorical(['a', 'b', 'c'])) | ||
tm.assert_categorical_equal(result, expected) | ||
|
||
result = pd.Categorical.from_codes( | ||
[0, 1], categories=pd.CategoricalIndex(['a', 'b', 'c'])) | ||
tm.assert_categorical_equal(result, expected) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same regarding |
||
|
||
@pytest.mark.parametrize('dtype', [None, 'category']) | ||
def test_from_inferred_categories(self, dtype): | ||
cats = ['a', 'b'] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would move this to the CDT issue, IOW the sub-section (as its actually not an independent bug) and better there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The bug also exists for
Categorical
andCateforicalIndex
and was already present in current released 0.20.3. So not directly related to the CDT rework (although the bug is present in CDT as well). But not that important, can also move if you want.