diff --git a/doc/source/whatsnew/v1.2.4.rst b/doc/source/whatsnew/v1.2.4.rst index 26d768f830830..59fae178b3214 100644 --- a/doc/source/whatsnew/v1.2.4.rst +++ b/doc/source/whatsnew/v1.2.4.rst @@ -19,6 +19,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`) - Fixed regression in :meth:`DataFrame.where` not returning a copy in the case of an all True condition (:issue:`39595`) - Fixed regression in :meth:`DataFrame.replace` raising ``IndexError`` when ``regex`` was a multi-key dictionary (:issue:`39338`) +- Fixed regression in :meth:`Series.replace` and :meth:`DataFrame.replace` not replacing missing values for :class:`CategoricalDtype` (:issue:`40472`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 08646c4d25a50..3b92e0ead799e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2350,7 +2350,7 @@ def replace(self, to_replace, value, inplace: bool = False): # other cases, like if both to_replace and value are list-like or if # to_replace is a dict, are handled separately in NDFrame for replace_value, new_value in replace_dict.items(): - if new_value == replace_value: + if new_value == replace_value or (isna(replace_value) and isna(new_value)): continue if replace_value in cat.categories: if isna(new_value): @@ -2365,6 +2365,18 @@ def replace(self, to_replace, value, inplace: bool = False): else: categories[index] = new_value cat.rename_categories(categories, inplace=True) + + # GH-40472: make sure missing values can also be replaced + elif isna(replace_value) and (cat._codes == -1).any(): + if new_value in cat.categories: + categories = cat.categories.tolist() + value_index = categories.index(new_value) + else: + cat.add_categories(new_value, inplace=True) + value_index = len(cat.categories) - 1 + + cat._codes[cat._codes == -1] = value_index + if not inplace: return cat diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index d8f93f047e74b..aa5815cd4b7da 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1652,6 +1652,19 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d tm.assert_frame_equal(result, expected) + # Replace with an existing category and one which will add a new category + @pytest.mark.parametrize("new_value", ["c", "b"]) + def test_replace_categorical_missing_vals( + self, frame_or_series, unique_nulls_fixture, new_value + ): + # GH-40472 + obj = frame_or_series([unique_nulls_fixture, "b"], dtype="category") + + result = obj.replace({unique_nulls_fixture: new_value}) + expected = frame_or_series([new_value, "b"], dtype="category") + + tm.assert_equal(result, expected) + def test_replace_with_compiled_regex(self): # https://github.com/pandas-dev/pandas/issues/35680 df = DataFrame(["a", "b", "c"])