From 2d9c4dbcaabff01142e1313fe9a155c44b4a0eaf Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sat, 27 Mar 2021 12:17:21 -0400 Subject: [PATCH 1/3] REGR: missing vals not replaceable in categorical --- pandas/core/arrays/categorical.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 08646c4d25a50..607dee5c9695d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2365,6 +2365,17 @@ def replace(self, to_replace, value, inplace: bool = False): else: categories[index] = new_value cat.rename_categories(categories, inplace=True) + elif isna(replace_value) and (cat._codes == -1).any(): + if new_value in cat.categories: + categories = cat.categories.tolist() + value_index = categories.index(new_value) + cat._codes[cat._codes == -1] = value_index + else: + cat.add_categories(new_value, inplace=True) + new_value = len(cat.categories) - 1 + + cat._codes[cat._codes == -1] = new_value + if not inplace: return cat From b0cb759390527a0afbddafe34459c7433fdd3117 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sat, 27 Mar 2021 12:54:30 -0400 Subject: [PATCH 2/3] Add whatsnew --- doc/source/whatsnew/v1.2.4.rst | 1 + pandas/core/arrays/categorical.py | 9 +++++---- pandas/tests/frame/methods/test_replace.py | 11 +++++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.2.4.rst b/doc/source/whatsnew/v1.2.4.rst index 26d768f830830..59fae178b3214 100644 --- a/doc/source/whatsnew/v1.2.4.rst +++ b/doc/source/whatsnew/v1.2.4.rst @@ -19,6 +19,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`) - Fixed regression in :meth:`DataFrame.where` not returning a copy in the case of an all True condition (:issue:`39595`) - Fixed regression in :meth:`DataFrame.replace` raising ``IndexError`` when ``regex`` was a multi-key dictionary (:issue:`39338`) +- Fixed regression in :meth:`Series.replace` and :meth:`DataFrame.replace` not replacing missing values for :class:`CategoricalDtype` (:issue:`40472`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 607dee5c9695d..3b92e0ead799e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2350,7 +2350,7 @@ def replace(self, to_replace, value, inplace: bool = False): # other cases, like if both to_replace and value are list-like or if # to_replace is a dict, are handled separately in NDFrame for replace_value, new_value in replace_dict.items(): - if new_value == replace_value: + if new_value == replace_value or (isna(replace_value) and isna(new_value)): continue if replace_value in cat.categories: if isna(new_value): @@ -2365,16 +2365,17 @@ def replace(self, to_replace, value, inplace: bool = False): else: categories[index] = new_value cat.rename_categories(categories, inplace=True) + + # GH-40472: make sure missing values can also be replaced elif isna(replace_value) and (cat._codes == -1).any(): if new_value in cat.categories: categories = cat.categories.tolist() value_index = categories.index(new_value) - cat._codes[cat._codes == -1] = value_index else: cat.add_categories(new_value, inplace=True) - new_value = len(cat.categories) - 1 + value_index = len(cat.categories) - 1 - cat._codes[cat._codes == -1] = new_value + cat._codes[cat._codes == -1] = value_index if not inplace: return cat diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index d8f93f047e74b..a5840fa7c0a5d 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1652,6 +1652,17 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d tm.assert_frame_equal(result, expected) + # Replace with an existing category and one which will add a new category + @pytest.mark.parametrize("new_value", ["c", "b"]) + def test_replace_categorical_missing_vals(self, frame_or_series, unique_nulls_fixture, new_value): + # GH-40472 + obj = frame_or_series([unique_nulls_fixture, "b"], dtype="category") + + result = obj.replace({unique_nulls_fixture: new_value}) + expected = frame_or_series([new_value, "b"], dtype="category") + + tm.assert_equal(result, expected) + def test_replace_with_compiled_regex(self): # https://github.com/pandas-dev/pandas/issues/35680 df = DataFrame(["a", "b", "c"]) From 445356db867dea7ab826132f8cdddc23200393c0 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sat, 27 Mar 2021 13:07:36 -0400 Subject: [PATCH 3/3] precommit fixup --- pandas/tests/frame/methods/test_replace.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index a5840fa7c0a5d..aa5815cd4b7da 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1654,7 +1654,9 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d # Replace with an existing category and one which will add a new category @pytest.mark.parametrize("new_value", ["c", "b"]) - def test_replace_categorical_missing_vals(self, frame_or_series, unique_nulls_fixture, new_value): + def test_replace_categorical_missing_vals( + self, frame_or_series, unique_nulls_fixture, new_value + ): # GH-40472 obj = frame_or_series([unique_nulls_fixture, "b"], dtype="category")