From eeef9fc0b8949b44949642450a8efdcbcc814da9 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Sun, 30 Oct 2022 21:12:38 -0400
Subject: [PATCH 1/3] refactor Categorical._replace

---
 doc/source/whatsnew/v2.0.0.rst                |  3 ++
 pandas/core/arrays/categorical.py             | 46 ++++++-------------
 pandas/core/internals/blocks.py               | 16 ++++---
 .../tests/arrays/categorical/test_replace.py  | 10 ++++
 4 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 5614b7a2c0846..19296587011da 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -298,6 +298,7 @@ Performance improvements
 - Performance improvement for :class:`DatetimeIndex` constructor passing a list (:issue:`48609`)
 - Performance improvement in :func:`merge` and :meth:`DataFrame.join` when joining on a sorted :class:`MultiIndex` (:issue:`48504`)
 - Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`48384`)
+- Performance improvement for :meth:`Series.replace` with categorical dtype (:issue:`#####`)
 - Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`)
 - Performance improvement for :func:`concat` with extension array backed indexes (:issue:`49128`, :issue:`49178`)
 - Reduce memory usage of :meth:`DataFrame.to_pickle`/:meth:`Series.to_pickle` when using BZ2 or LZMA (:issue:`49068`)
@@ -319,6 +320,8 @@ Bug fixes
 Categorical
 ^^^^^^^^^^^
 - Bug in :meth:`Categorical.set_categories` losing dtype information (:issue:`48812`)
+- Bug in :meth:`Series.replace` with categorical dtype when ``to_replace`` values overlap with new values (:issue:`#####`)
+- Bug in :meth:`Series.replace` with categorical dtype losing nullable dtypes of underlying categories (:issue:`#####`)
 - Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would reorder categories when used as a grouper (:issue:`48749`)
 
 Datetimelike
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index becca2b668290..d3805dea40ae7 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2286,42 +2286,24 @@ def isin(self, values) -> npt.NDArray[np.bool_]:
         return algorithms.isin(self.codes, code_values)
 
     def _replace(self, *, to_replace, value, inplace: bool = False):
+        from pandas import (
+            Index,
+            Series,
+        )
+
         inplace = validate_bool_kwarg(inplace, "inplace")
         cat = self if inplace else self.copy()
 
-        # other cases, like if both to_replace and value are list-like or if
-        # to_replace is a dict, are handled separately in NDFrame
-        if not is_list_like(to_replace):
-            to_replace = [to_replace]
-
-        categories = cat.categories.tolist()
-        removals = set()
-        for replace_value in to_replace:
-            if value == replace_value:
-                continue
-            if replace_value not in cat.categories:
-                continue
-            if isna(value):
-                removals.add(replace_value)
-                continue
-
-            index = categories.index(replace_value)
-
-            if value in cat.categories:
-                value_index = categories.index(value)
-                cat._codes[cat._codes == index] = value_index
-                removals.add(replace_value)
-            else:
-                categories[index] = value
-                cat._set_categories(categories)
+        ser = Series(cat.categories, copy=True)
+        ser.replace(to_replace=to_replace, value=value, inplace=True)
 
-        if len(removals):
-            new_categories = [c for c in categories if c not in removals]
-            new_dtype = CategoricalDtype(new_categories, ordered=self.dtype.ordered)
-            codes = recode_for_categories(
-                cat.codes, cat.categories, new_dtype.categories
-            )
-            NDArrayBacked.__init__(cat, codes, new_dtype)
+        all_values = Index(ser)
+        new_categories = Index(ser.dropna().drop_duplicates(keep="first"))
+        new_codes = recode_for_categories(
+            cat._codes, all_values, new_categories, copy=False
+        )
+        new_dtype = CategoricalDtype(new_categories, ordered=self.dtype.ordered)
+        NDArrayBacked.__init__(cat, new_codes, new_dtype)
 
         if not inplace:
             return cat
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index f944c74ac37fd..fec485c9a7993 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -575,13 +575,10 @@ def replace(
         values = self.values
 
         if isinstance(values, Categorical):
-            # TODO: avoid special-casing
+            # GH (TODO)
             blk = self if inplace else self.copy()
-            # error: Item "ExtensionArray" of "Union[ndarray[Any, Any],
-            # ExtensionArray]" has no attribute "_replace"
-            blk.values._replace(  # type: ignore[union-attr]
-                to_replace=to_replace, value=value, inplace=True
-            )
+            values = cast(Categorical, blk.values)
+            values._replace(to_replace=to_replace, value=value, inplace=True)
             return [blk]
 
         if not self._can_hold_element(to_replace):
@@ -688,6 +685,13 @@ def replace_list(
         """
         values = self.values
 
+        if isinstance(values, Categorical):
+            # GH (TODO)
+            blk = self if inplace else self.copy()
+            values = cast(Categorical, blk.values)
+            values._replace(to_replace=src_list, value=dest_list, inplace=True)
+            return [blk]
+
         # Exclude anything that we know we won't contain
         pairs = [
             (x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py
index a3ba420c84a17..84db9d89b31cc 100644
--- a/pandas/tests/arrays/categorical/test_replace.py
+++ b/pandas/tests/arrays/categorical/test_replace.py
@@ -21,6 +21,8 @@
         ((5, 6), 2, [1, 2, 3], False),
         ([1], [2], [2, 2, 3], False),
         ([1, 4], [5, 2], [5, 2, 3], False),
+        # GH # (TODO)
+        ([1, 2, 3], [2, 3, 4], [2, 3, 4], False),
         # check_categorical sorts categories, which crashes on mixed dtypes
         (3, "4", [1, 2, "4"], False),
         ([1, 2, "3"], "5", ["5", "5", 3], True),
@@ -65,3 +67,11 @@ def test_replace_categorical(to_replace, value, result, expected_error_msg):
 
     pd.Series(cat).replace(to_replace, value, inplace=True)
     tm.assert_categorical_equal(cat, expected)
+
+
+def test_replace_categorical_ea_dtype():
+    # GH#
+    cat = Categorical(pd.array(["a", "b"], dtype="string"))
+    result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values
+    expected = Categorical(pd.array(["c", pd.NA], dtype="string"))
+    tm.assert_categorical_equal(result, expected)

From 05c2025bec021cf7ad3c3843bd1397c8e290ed25 Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Sun, 30 Oct 2022 21:26:25 -0400
Subject: [PATCH 2/3] gh refs

---
 doc/source/whatsnew/v2.0.0.rst                  | 6 +++---
 pandas/core/internals/blocks.py                 | 4 ++--
 pandas/tests/arrays/categorical/test_replace.py | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 19296587011da..8156196d33b70 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -298,7 +298,7 @@ Performance improvements
 - Performance improvement for :class:`DatetimeIndex` constructor passing a list (:issue:`48609`)
 - Performance improvement in :func:`merge` and :meth:`DataFrame.join` when joining on a sorted :class:`MultiIndex` (:issue:`48504`)
 - Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`48384`)
-- Performance improvement for :meth:`Series.replace` with categorical dtype (:issue:`#####`)
+- Performance improvement for :meth:`Series.replace` with categorical dtype (:issue:`49404`)
 - Performance improvement for :meth:`MultiIndex.unique` (:issue:`48335`)
 - Performance improvement for :func:`concat` with extension array backed indexes (:issue:`49128`, :issue:`49178`)
 - Reduce memory usage of :meth:`DataFrame.to_pickle`/:meth:`Series.to_pickle` when using BZ2 or LZMA (:issue:`49068`)
@@ -320,8 +320,8 @@ Bug fixes
 Categorical
 ^^^^^^^^^^^
 - Bug in :meth:`Categorical.set_categories` losing dtype information (:issue:`48812`)
-- Bug in :meth:`Series.replace` with categorical dtype when ``to_replace`` values overlap with new values (:issue:`#####`)
-- Bug in :meth:`Series.replace` with categorical dtype losing nullable dtypes of underlying categories (:issue:`#####`)
+- Bug in :meth:`Series.replace` with categorical dtype when ``to_replace`` values overlap with new values (:issue:`49404`)
+- Bug in :meth:`Series.replace` with categorical dtype losing nullable dtypes of underlying categories (:issue:`49404`)
 - Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` would reorder categories when used as a grouper (:issue:`48749`)
 
 Datetimelike
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index fec485c9a7993..bc9b00fe07582 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -575,7 +575,7 @@ def replace(
         values = self.values
 
         if isinstance(values, Categorical):
-            # GH (TODO)
+            # GH49404
             blk = self if inplace else self.copy()
             values = cast(Categorical, blk.values)
             values._replace(to_replace=to_replace, value=value, inplace=True)
@@ -686,7 +686,7 @@ def replace_list(
         values = self.values
 
         if isinstance(values, Categorical):
-            # GH (TODO)
+            # GH49404
             blk = self if inplace else self.copy()
             values = cast(Categorical, blk.values)
             values._replace(to_replace=src_list, value=dest_list, inplace=True)
diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py
index 84db9d89b31cc..62a7bf0673a16 100644
--- a/pandas/tests/arrays/categorical/test_replace.py
+++ b/pandas/tests/arrays/categorical/test_replace.py
@@ -21,7 +21,7 @@
         ((5, 6), 2, [1, 2, 3], False),
         ([1], [2], [2, 2, 3], False),
         ([1, 4], [5, 2], [5, 2, 3], False),
-        # GH # (TODO)
+        # GH49404
         ([1, 2, 3], [2, 3, 4], [2, 3, 4], False),
         # check_categorical sorts categories, which crashes on mixed dtypes
         (3, "4", [1, 2, "4"], False),
@@ -70,7 +70,7 @@ def test_replace_categorical(to_replace, value, result, expected_error_msg):
 
 
 def test_replace_categorical_ea_dtype():
-    # GH#
+    # GH49404
     cat = Categorical(pd.array(["a", "b"], dtype="string"))
     result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values
     expected = Categorical(pd.array(["c", pd.NA], dtype="string"))

From b84fdfeae6cad512f4e8ad39dd2971f5bb1f956d Mon Sep 17 00:00:00 2001
From: Luke Manley <lukemanley@gmail.com>
Date: Mon, 31 Oct 2022 19:56:39 -0400
Subject: [PATCH 3/3] cleanup

---
 pandas/core/arrays/categorical.py | 2 +-
 pandas/core/internals/blocks.py   | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index d3805dea40ae7..5f769e5fd8467 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2295,7 +2295,7 @@ def _replace(self, *, to_replace, value, inplace: bool = False):
         cat = self if inplace else self.copy()
 
         ser = Series(cat.categories, copy=True)
-        ser.replace(to_replace=to_replace, value=value, inplace=True)
+        ser = ser.replace(to_replace=to_replace, value=value)
 
         all_values = Index(ser)
         new_categories = Index(ser.dropna().drop_duplicates(keep="first"))
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index e0d548fd98c0e..06fc70c0af2dd 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -574,6 +574,7 @@ def replace(
         values = self.values
 
         if isinstance(values, Categorical):
+            # TODO: avoid special-casing
             # GH49404
             blk = self if inplace else self.copy()
             values = cast(Categorical, blk.values)
@@ -688,6 +689,7 @@ def replace_list(
         values = self.values
 
         if isinstance(values, Categorical):
+            # TODO: avoid special-casing
             # GH49404
             blk = self if inplace else self.copy()
             values = cast(Categorical, blk.values)