Skip to content

Commit d765dc3

Browse files
committed
BUG-24241 make Categorical.map transform nans
1 parent b7ee829 commit d765dc3

File tree

3 files changed

+50
-3
lines changed

3 files changed

+50
-3
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1273,6 +1273,7 @@ Categorical
12731273
- Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`)
12741274
- Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`)
12751275
- Bug in :meth:`Series.where` losing the categorical dtype for categorical data (:issue:`24077`)
1276+
- Bug in :meth:`Categorical.apply` where the given function would not be applied to ``NaN`` values (:issue:`24241`)
12761277

12771278
Datetimelike
12781279
^^^^^^^^^^^^

pandas/core/arrays/categorical.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -1229,11 +1229,26 @@ def map(self, mapper):
12291229
Index(['first', 'second', nan], dtype='object')
12301230
"""
12311231
new_categories = self.categories.map(mapper)
1232+
12321233
try:
1233-
return self.from_codes(self._codes.copy(),
1234-
categories=new_categories,
1235-
ordered=self.ordered)
1234+
if isinstance(mapper, (dict, ABCSeries)):
1235+
new_value = mapper[np.nan]
1236+
else:
1237+
new_value = mapper(np.nan)
1238+
except (AttributeError, KeyError, TypeError, ValueError):
1239+
new_value = np.nan
1240+
1241+
try:
1242+
ret = self.from_codes(self._codes.copy(),
1243+
categories=new_categories,
1244+
ordered=self.ordered)
1245+
if new_value not in ret.categories and any(self._codes == -1):
1246+
ret.add_categories(new_value, inplace=True)
1247+
ret = ret.fillna(new_value)
1248+
return ret
12361249
except ValueError:
1250+
new_categories = new_categories.insert(len(new_categories),
1251+
new_value)
12371252
return np.take(new_categories, self._codes)
12381253

12391254
__eq__ = _cat_compare_op('__eq__')

pandas/tests/indexes/test_category.py

+31
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,37 @@ def test_map_with_categorical_series(self):
311311
exp = pd.Index(["odd", "even", "odd", np.nan])
312312
tm.assert_index_equal(a.map(c), exp)
313313

314+
@pytest.mark.parametrize('data, f', [[[1, 1, np.nan], pd.isna],
315+
[[1, 2, np.nan], pd.isna],
316+
[[1, 1, np.nan], {1: False,
317+
np.nan: True}],
318+
[[1, 2, np.nan], {1: False,
319+
2: False,
320+
np.nan: True}]])
321+
def test_map_fill_nan(self, data, f):
322+
values = pd.Categorical(data)
323+
result = values.map(f)
324+
if data[1] == 1:
325+
expected = pd.Categorical([False, False, True])
326+
tm.assert_categorical_equal(result, expected)
327+
else:
328+
expected = pd.Index([False, False, True])
329+
tm.assert_index_equal(result, expected)
330+
331+
@pytest.mark.parametrize('data, f', [[[1, 1, np.nan], {1: False}],
332+
[[1, 2, np.nan], {1: False,
333+
2: False}],
334+
[[1, 1, np.nan], pd.Series([False,
335+
False])],
336+
[[1, 2, np.nan], pd.Series([False,
337+
False,
338+
False])]])
339+
def test_map_dont_fill_nan(self, data, f):
340+
values = pd.Categorical(data)
341+
result = values.map(f)
342+
expected = pd.Index([False, False, np.nan])
343+
tm.assert_index_equal(result, expected)
344+
314345
@pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
315346
def test_where(self, klass):
316347
i = self.create_index()

0 commit comments

Comments
 (0)