Skip to content

Commit 590238d

Browse files
committed
BUG-24241 update documentation instead
1 parent 628bfac commit 590238d

File tree

4 files changed

+17
-43
lines changed

4 files changed

+17
-43
lines changed

doc/source/categorical.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,8 @@ dtype in apply
11451145

11461146
Pandas currently does not preserve the dtype in apply functions: If you apply along rows you get
11471147
a `Series` of ``object`` `dtype` (same as getting a row -> getting one element will return a
1148-
basic type) and applying along columns will also convert to object.
1148+
basic type) and applying along columns will also convert to object. ``NaN`` values are unaffected.
1149+
You can use ``fillna`` to handle missing values before applying a function.
11491150

11501151
.. ipython:: python
11511152

doc/source/whatsnew/v0.24.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1273,7 +1273,7 @@ Categorical
12731273
- Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`)
12741274
- Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`)
12751275
- Bug in :meth:`Series.where` losing the categorical dtype for categorical data (:issue:`24077`)
1276-
- Bug in :meth:`Categorical.apply` where the given function would not be applied to ``NaN`` values (:issue:`24241`)
1276+
- Bug in :meth:`Categorical.apply` where ``NaN`` values could be handled unpredictably. They now remain unchanged (:issue:`24241`)
12771277

12781278
Datetimelike
12791279
^^^^^^^^^^^^

pandas/core/arrays/categorical.py

+7-19
Original file line numberDiff line numberDiff line change
@@ -1166,7 +1166,7 @@ def map(self, mapper):
11661166
Maps the categories to new categories. If the mapping correspondence is
11671167
one-to-one the result is a :class:`~pandas.Categorical` which has the
11681168
same order property as the original, otherwise a :class:`~pandas.Index`
1169-
is returned.
1169+
is returned. NaN values are unaffected.
11701170
11711171
If a `dict` or :class:`~pandas.Series` is used any unmapped category is
11721172
mapped to `NaN`. Note that if this happens an :class:`~pandas.Index`
@@ -1229,26 +1229,14 @@ def map(self, mapper):
12291229
Index(['first', 'second', nan], dtype='object')
12301230
"""
12311231
new_categories = self.categories.map(mapper)
1232-
12331232
try:
1234-
if is_dict_like(mapper):
1235-
new_value = mapper[np.nan]
1236-
else:
1237-
new_value = mapper(np.nan)
1238-
except (AttributeError, KeyError, TypeError, ValueError):
1239-
new_value = np.nan
1240-
1241-
try:
1242-
ret = self.from_codes(self._codes.copy(),
1243-
categories=new_categories,
1244-
ordered=self.ordered)
1245-
if new_value not in ret.categories and any(self._codes == -1):
1246-
ret.add_categories(new_value, inplace=True)
1247-
ret = ret.fillna(new_value)
1248-
return ret
1233+
return self.from_codes(self._codes.copy(),
1234+
categories=new_categories,
1235+
ordered=self.ordered)
12491236
except ValueError:
1250-
new_categories = new_categories.insert(len(new_categories),
1251-
new_value)
1237+
if any(self._codes == -1):
1238+
new_categories = new_categories.insert(len(new_categories),
1239+
np.nan)
12521240
return np.take(new_categories, self._codes)
12531241

12541242
__eq__ = _cat_compare_op('__eq__')

pandas/tests/indexes/test_category.py

+7-22
Original file line numberDiff line numberDiff line change
@@ -319,35 +319,20 @@ def test_map_with_categorical_series(self):
319319
(
320320
([1, 1, np.nan], pd.isna),
321321
([1, 2, np.nan], pd.isna),
322-
([1, 1, np.nan], {1: False, np.nan: True}),
323-
([1, 2, np.nan], {1: False, 2: False, np.nan: True})
324-
))
325-
def test_map_fill_nan(self, data, f): # GH 24241
326-
values = pd.Categorical(data)
327-
result = values.map(f)
328-
if data[1] == 1:
329-
expected = pd.Categorical([False, False, True])
330-
tm.assert_categorical_equal(result, expected)
331-
else:
332-
expected = pd.Index([False, False, True])
333-
tm.assert_index_equal(result, expected)
334-
335-
@pytest.mark.parametrize(
336-
(
337-
'data',
338-
'f'
339-
),
340-
(
341322
([1, 1, np.nan], {1: False}),
342323
([1, 2, np.nan], {1: False, 2: False}),
343324
([1, 1, np.nan], pd.Series([False, False])),
344325
([1, 2, np.nan], pd.Series([False, False, False]))
345326
))
346-
def test_map_dont_fill_nan(self, data, f): # GH 24241
327+
def test_map_with_nan(self, data, f): # GH 24241
347328
values = pd.Categorical(data)
348329
result = values.map(f)
349-
expected = pd.Index([False, False, np.nan])
350-
tm.assert_index_equal(result, expected)
330+
if data[1] == 1:
331+
expected = pd.Categorical([False, False, np.nan])
332+
tm.assert_categorical_equal(result, expected)
333+
else:
334+
expected = pd.Index([False, False, np.nan])
335+
tm.assert_index_equal(result, expected)
351336

352337
@pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
353338
def test_where(self, klass):

0 commit comments

Comments
 (0)