Skip to content

Commit 58de332

Browse files
authored
BUG: fix treatment of NaNs when .apply() function is used on categorical columns. (#59966)
* remove action=ignore for .apply() on cat dtype * add PR reference in comments * fix pytest linting * refac failing test_series_apply.py * Trigger CI * changes post review * rephrase change log
1 parent 4ad6c7a commit 58de332

File tree

4 files changed

+8
-17
lines changed

4 files changed

+8
-17
lines changed

doc/source/whatsnew/v3.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ Bug fixes
544544

545545
Categorical
546546
^^^^^^^^^^^
547-
-
547+
- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
548548
-
549549

550550
Datetimelike

pandas/core/apply.py

+2-12
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,7 @@
3838
is_numeric_dtype,
3939
is_sequence,
4040
)
41-
from pandas.core.dtypes.dtypes import (
42-
CategoricalDtype,
43-
ExtensionDtype,
44-
)
41+
from pandas.core.dtypes.dtypes import ExtensionDtype
4542
from pandas.core.dtypes.generic import (
4643
ABCDataFrame,
4744
ABCNDFrame,
@@ -1465,14 +1462,7 @@ def curried(x):
14651462

14661463
else:
14671464
curried = func
1468-
1469-
# row-wise access
1470-
# apply doesn't have a `na_action` keyword and for backward compat reasons
1471-
# we need to give `na_action="ignore"` for categorical data.
1472-
# TODO: remove the `na_action="ignore"` when that default has been changed in
1473-
# Categorical (GH51645).
1474-
action = "ignore" if isinstance(obj.dtype, CategoricalDtype) else None
1475-
mapped = obj._map_values(mapper=curried, na_action=action)
1465+
mapped = obj._map_values(mapper=curried)
14761466

14771467
if len(mapped) and isinstance(mapped[0], ABCSeries):
14781468
# GH#43986 Need to do list(mapped) in order to get treated as nested

pandas/tests/apply/test_frame_apply.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -741,8 +741,9 @@ def test_apply_category_equalness(val):
741741

742742
result = df.a.apply(lambda x: x == val)
743743
expected = Series(
744-
[np.nan if pd.isnull(x) else x == val for x in df_values], name="a"
744+
[False if pd.isnull(x) else x == val for x in df_values], name="a"
745745
)
746+
# False since behavior of NaN for categorical dtype has been changed (GH 59966)
746747
tm.assert_series_equal(result, expected)
747748

748749

pandas/tests/apply/test_series_apply.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -236,10 +236,10 @@ def test_apply_categorical_with_nan_values(series, by_row):
236236
with pytest.raises(AttributeError, match=msg):
237237
s.apply(lambda x: x.split("-")[0], by_row=by_row)
238238
return
239-
240-
result = s.apply(lambda x: x.split("-")[0], by_row=by_row)
239+
# NaN for cat dtype fixed in (GH 59966)
240+
result = s.apply(lambda x: x.split("-")[0] if pd.notna(x) else False, by_row=by_row)
241241
result = result.astype(object)
242-
expected = Series(["1", "1", np.nan], dtype="category")
242+
expected = Series(["1", "1", False], dtype="category")
243243
expected = expected.astype(object)
244244
tm.assert_series_equal(result, expected)
245245

0 commit comments

Comments
 (0)