-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
GH24241 make Categorical.map transform nans #24275
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
d765dc3
628bfac
2011996
e5b5415
82859d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1229,11 +1229,26 @@ def map(self, mapper): | |
Index(['first', 'second', nan], dtype='object') | ||
""" | ||
new_categories = self.categories.map(mapper) | ||
|
||
try: | ||
return self.from_codes(self._codes.copy(), | ||
categories=new_categories, | ||
ordered=self.ordered) | ||
if isinstance(mapper, (dict, ABCSeries)): | ||
new_value = mapper[np.nan] | ||
else: | ||
new_value = mapper(np.nan) | ||
except (AttributeError, KeyError, TypeError, ValueError): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not really comfortable with this. def f(x):
if isnan(x):
raise TypeError
... that TypeError would be swallowed by pandas. |
||
new_value = np.nan | ||
|
||
try: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you try/except here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how / why can this fail There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AttributeError: if mapper calls a method of the element (e.g. if you mean the try/except below that, that was already there. |
||
ret = self.from_codes(self._codes.copy(), | ||
categories=new_categories, | ||
ordered=self.ordered) | ||
if new_value not in ret.categories and any(self._codes == -1): | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ret.add_categories(new_value, inplace=True) | ||
ret = ret.fillna(new_value) | ||
return ret | ||
except ValueError: | ||
new_categories = new_categories.insert(len(new_categories), | ||
new_value) | ||
return np.take(new_categories, self._codes) | ||
|
||
__eq__ = _cat_compare_op('__eq__') | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -311,6 +311,37 @@ def test_map_with_categorical_series(self): | |
exp = pd.Index(["odd", "even", "odd", np.nan]) | ||
tm.assert_index_equal(a.map(c), exp) | ||
|
||
@pytest.mark.parametrize('data, f', [[[1, 1, np.nan], pd.isna], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. write this as
|
||
[[1, 2, np.nan], pd.isna], | ||
[[1, 1, np.nan], {1: False, | ||
np.nan: True}], | ||
[[1, 2, np.nan], {1: False, | ||
2: False, | ||
np.nan: True}]]) | ||
def test_map_fill_nan(self, data, f): | ||
values = pd.Categorical(data) | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
result = values.map(f) | ||
if data[1] == 1: | ||
expected = pd.Categorical([False, False, True]) | ||
tm.assert_categorical_equal(result, expected) | ||
else: | ||
expected = pd.Index([False, False, True]) | ||
tm.assert_index_equal(result, expected) | ||
|
||
@pytest.mark.parametrize('data, f', [[[1, 1, np.nan], {1: False}], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
[[1, 2, np.nan], {1: False, | ||
2: False}], | ||
[[1, 1, np.nan], pd.Series([False, | ||
False])], | ||
[[1, 2, np.nan], pd.Series([False, | ||
False, | ||
False])]]) | ||
def test_map_dont_fill_nan(self, data, f): | ||
values = pd.Categorical(data) | ||
result = values.map(f) | ||
expected = pd.Index([False, False, np.nan]) | ||
tm.assert_index_equal(result, expected) | ||
|
||
@pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series]) | ||
def test_where(self, klass): | ||
i = self.create_index() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this a special case? use
is_dict_like