From 4d3944d557402b97ca477e471e19b93d79073690 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Mon, 6 Apr 2020 17:01:56 -0500 Subject: [PATCH] Backport PR #33292 on branch 1.0.x (REGR: Fix bug when replacing categorical value with self) --- doc/source/whatsnew/v1.0.4.rst | 1 + pandas/core/arrays/categorical.py | 2 ++ pandas/tests/arrays/categorical/test_algos.py | 2 ++ 3 files changed, 5 insertions(+) diff --git a/doc/source/whatsnew/v1.0.4.rst b/doc/source/whatsnew/v1.0.4.rst index 8eb6b32669d60..1ce3ba5f088e7 100644 --- a/doc/source/whatsnew/v1.0.4.rst +++ b/doc/source/whatsnew/v1.0.4.rst @@ -17,6 +17,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`) - Bug in DataFrame reductions using ``numeric_only=True`` and ExtensionArrays (:issue:`33256`). +- Bug where :meth:`Categorical.replace` would replace with ``NaN`` whenever the new value and replacement value were equal (:issue:`33288`) - .. _whatsnew_104.bug_fixes: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d8a2fbdd58382..756896279054c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2459,6 +2459,8 @@ def replace(self, to_replace, value, inplace: bool = False): # other cases, like if both to_replace and value are list-like or if # to_replace is a dict, are handled separately in NDFrame for replace_value, new_value in replace_dict.items(): + if new_value == replace_value: + continue if replace_value in cat.categories: if isna(new_value): cat.remove_categories(replace_value, inplace=True) diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index 835aa87a7c21b..50ad27ce3c2c5 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -64,6 +64,8 @@ def test_isin_cats(): [ ("b", "c", ["a", "c"], "Categorical.categories are different"), ("c", "d", ["a", "b"], None), + # https://github.com/pandas-dev/pandas/issues/33288 + ("a", "a", ["a", "b"], None), ("b", None, ["a", None], "Categorical.categories length are different"), ], )