From 8c8b36fcb79998754cdf13bc9abddcfd711f1944 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 3 Sep 2020 08:53:04 -0700
Subject: [PATCH 1/4] BUG: df.replace with numeric values and string to_replace

---
 doc/source/whatsnew/v1.2.0.rst              |   2 +-
 pandas/core/array_algos/replace.py          |  94 ++++++++++++++++++
 pandas/core/internals/blocks.py             |  28 +++++-
 pandas/core/internals/managers.py           | 104 +-------------------
 pandas/tests/frame/methods/test_replace.py  |  15 ++-
 pandas/tests/series/methods/test_replace.py |   5 +-
 6 files changed, 136 insertions(+), 112 deletions(-)
 create mode 100644 pandas/core/array_algos/replace.py

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 407e8ba029ada..526e1a4fb0f89 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -281,7 +281,7 @@ ExtensionArray
 
 Other
 ^^^^^
--
+- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py
new file mode 100644
index 0000000000000..341472e764e7f
--- /dev/null
+++ b/pandas/core/array_algos/replace.py
@@ -0,0 +1,94 @@
+"""
+Methods used by Block.replace and related methods.
+"""
+import operator
+import re
+from typing import Optional, Pattern, Union
+
+import numpy as np
+
+from pandas._typing import ArrayLike, Scalar
+
+from pandas.core.dtypes.common import (
+    is_datetimelike_v_numeric,
+    is_numeric_v_string_like,
+    is_scalar,
+)
+
+
+def compare_or_regex_search(
+    a: ArrayLike,
+    b: Union[Scalar, Pattern],
+    regex: bool = False,
+    mask: Optional[ArrayLike] = None,
+) -> Union[ArrayLike, bool]:
+    """
+    Compare two array_like inputs of the same shape or two scalar values
+
+    Calls operator.eq or re.search, depending on regex argument. If regex is
+    True, perform an element-wise regex matching.
+
+    Parameters
+    ----------
+    a : array_like
+    b : scalar or regex pattern
+    regex : bool, default False
+    mask : array_like or None (default)
+
+    Returns
+    -------
+    mask : array_like of bool
+    """
+
+    def _check_comparison_types(
+        result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]
+    ):
+        """
+        Raises an error if the two arrays (a,b) cannot be compared.
+        Otherwise, returns the comparison result as expected.
+        """
+        if is_scalar(result) and isinstance(a, np.ndarray):
+            type_names = [type(a).__name__, type(b).__name__]
+
+            if isinstance(a, np.ndarray):
+                type_names[0] = f"ndarray(dtype={a.dtype})"
+
+            raise TypeError(
+                f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
+            )
+
+    if not regex:
+        op = lambda x: operator.eq(x, b)
+    else:
+        op = np.vectorize(
+            lambda x: bool(re.search(b, x))
+            if isinstance(x, str) and isinstance(b, (str, Pattern))
+            else False
+        )
+
+    # GH#32621 use mask to avoid comparing to NAs
+    if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
+        mask = np.reshape(~(isna(a)), a.shape)
+    if isinstance(a, np.ndarray):
+        a = a[mask]
+
+    if is_numeric_v_string_like(a, b):
+        # GH#29553 avoid deprecation warnings from numpy
+        return np.zeros(a.shape, dtype=bool)
+
+    elif is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b):
+        # GH#29553 avoid deprecation warnings from numpy
+        _check_comparison_types(False, a, b)
+        return False
+
+    result = op(a)
+
+    if isinstance(result, np.ndarray) and mask is not None:
+        # The shape of the mask can differ to that of the result
+        # since we may compare only a subset of a's or b's elements
+        tmp = np.zeros(mask.shape, dtype=np.bool_)
+        tmp[mask] = result
+        result = tmp
+
+    _check_comparison_types(result, a, b)
+    return result
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index ad388ef3f53b0..66448cac63106 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -11,7 +11,7 @@
 from pandas._libs.internals import BlockPlacement
 from pandas._libs.tslibs import conversion
 from pandas._libs.tslibs.timezones import tz_compare
-from pandas._typing import ArrayLike
+from pandas._typing import ArrayLike, Scalar
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.cast import (
@@ -59,8 +59,10 @@
 from pandas.core.dtypes.missing import _isna_compat, is_valid_nat_for_dtype, isna
 
 import pandas.core.algorithms as algos
+from pandas.core.array_algos.replace import compare_or_regex_search
 from pandas.core.array_algos.transforms import shift
 from pandas.core.arrays import (
+    BooleanArray,
     Categorical,
     DatetimeArray,
     ExtensionArray,
@@ -792,7 +794,6 @@ def _replace_list(
         self,
         src_list: List[Any],
         dest_list: List[Any],
-        masks: List[np.ndarray],
         inplace: bool = False,
         regex: bool = False,
     ) -> List["Block"]:
@@ -801,11 +802,28 @@ def _replace_list(
         """
         src_len = len(src_list) - 1
 
+        def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
+            """
+            Generate a bool array by perform an equality check, or perform
+            an element-wise regular expression matching
+            """
+            if isna(s):
+                return ~mask
+
+            s = com.maybe_box_datetimelike(s)
+            return compare_or_regex_search(self.values, s, regex, mask)
+
+        # Calculate the mask once, prior to the call of comp
+        # in order to avoid repeating the same computations
+        mask = ~isna(self.values)
+
+        masks = [comp(s, mask, regex) for s in src_list]
+
         rb = [self if inplace else self.copy()]
         for i, (src, dest) in enumerate(zip(src_list, dest_list)):
             new_rb: List["Block"] = []
             for blk in rb:
-                m = masks[i][blk.mgr_locs.indexer]
+                m = masks[i]
                 convert = i == src_len  # only convert once at the end
                 result = blk._replace_coerce(
                     mask=m,
@@ -2906,7 +2924,9 @@ def _extract_bool_array(mask: ArrayLike) -> np.ndarray:
     """
     If we have a SparseArray or BooleanArray, convert it to ndarray[bool].
     """
-    if isinstance(mask, ExtensionArray):
+    if isinstance(mask, BooleanArray):
+        mask = mask.to_numpy(dtype=bool, na_value=False)
+    elif isinstance(mask, ExtensionArray):
         # We could have BooleanArray, Sparse[bool], ...
         mask = np.asarray(mask, dtype=np.bool_)
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 389252e7ef0f2..fe53acdb1880d 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1,14 +1,11 @@
 from collections import defaultdict
 import itertools
-import operator
-import re
 from typing import (
     Any,
     DefaultDict,
     Dict,
     List,
     Optional,
-    Pattern,
     Sequence,
     Tuple,
     TypeVar,
@@ -19,7 +16,7 @@
 import numpy as np
 
 from pandas._libs import internals as libinternals, lib
-from pandas._typing import ArrayLike, DtypeObj, Label, Scalar
+from pandas._typing import ArrayLike, DtypeObj, Label
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.cast import (
@@ -29,12 +26,9 @@
 )
 from pandas.core.dtypes.common import (
     DT64NS_DTYPE,
-    is_datetimelike_v_numeric,
     is_dtype_equal,
     is_extension_array_dtype,
     is_list_like,
-    is_numeric_v_string_like,
-    is_scalar,
 )
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -44,7 +38,6 @@
 import pandas.core.algorithms as algos
 from pandas.core.arrays.sparse import SparseDtype
 from pandas.core.base import PandasObject
-import pandas.core.common as com
 from pandas.core.construction import extract_array
 from pandas.core.indexers import maybe_convert_indices
 from pandas.core.indexes.api import Index, ensure_index
@@ -610,31 +603,10 @@ def replace_list(
         """ do a list replace """
         inplace = validate_bool_kwarg(inplace, "inplace")
 
-        # figure out our mask apriori to avoid repeated replacements
-        values = self.as_array()
-
-        def comp(s: Scalar, mask: np.ndarray, regex: bool = False):
-            """
-            Generate a bool array by perform an equality check, or perform
-            an element-wise regular expression matching
-            """
-            if isna(s):
-                return ~mask
-
-            s = com.maybe_box_datetimelike(s)
-            return _compare_or_regex_search(values, s, regex, mask)
-
-        # Calculate the mask once, prior to the call of comp
-        # in order to avoid repeating the same computations
-        mask = ~isna(values)
-
-        masks = [comp(s, mask, regex) for s in src_list]
-
         bm = self.apply(
             "_replace_list",
             src_list=src_list,
             dest_list=dest_list,
-            masks=masks,
             inplace=inplace,
             regex=regex,
         )
@@ -1882,80 +1854,6 @@ def _merge_blocks(
     return blocks
 
 
-def _compare_or_regex_search(
-    a: ArrayLike,
-    b: Union[Scalar, Pattern],
-    regex: bool = False,
-    mask: Optional[ArrayLike] = None,
-) -> Union[ArrayLike, bool]:
-    """
-    Compare two array_like inputs of the same shape or two scalar values
-
-    Calls operator.eq or re.search, depending on regex argument. If regex is
-    True, perform an element-wise regex matching.
-
-    Parameters
-    ----------
-    a : array_like
-    b : scalar or regex pattern
-    regex : bool, default False
-    mask : array_like or None (default)
-
-    Returns
-    -------
-    mask : array_like of bool
-    """
-
-    def _check_comparison_types(
-        result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]
-    ):
-        """
-        Raises an error if the two arrays (a,b) cannot be compared.
-        Otherwise, returns the comparison result as expected.
-        """
-        if is_scalar(result) and isinstance(a, np.ndarray):
-            type_names = [type(a).__name__, type(b).__name__]
-
-            if isinstance(a, np.ndarray):
-                type_names[0] = f"ndarray(dtype={a.dtype})"
-
-            raise TypeError(
-                f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
-            )
-
-    if not regex:
-        op = lambda x: operator.eq(x, b)
-    else:
-        op = np.vectorize(
-            lambda x: bool(re.search(b, x))
-            if isinstance(x, str) and isinstance(b, (str, Pattern))
-            else False
-        )
-
-    # GH#32621 use mask to avoid comparing to NAs
-    if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
-        mask = np.reshape(~(isna(a)), a.shape)
-    if isinstance(a, np.ndarray):
-        a = a[mask]
-
-    if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b):
-        # GH#29553 avoid deprecation warnings from numpy
-        _check_comparison_types(False, a, b)
-        return False
-
-    result = op(a)
-
-    if isinstance(result, np.ndarray) and mask is not None:
-        # The shape of the mask can differ to that of the result
-        # since we may compare only a subset of a's or b's elements
-        tmp = np.zeros(mask.shape, dtype=np.bool_)
-        tmp[mask] = result
-        result = tmp
-
-    _check_comparison_types(result, a, b)
-    return result
-
-
 def _fast_count_smallints(arr: np.ndarray) -> np.ndarray:
     """Faster version of set(arr) for sequences of small numbers."""
     counts = np.bincount(arr.astype(np.int_))
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index 83dfd42ae2a6e..ea2488dfc0877 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -1131,8 +1131,19 @@ def test_replace_bool_with_bool(self):
 
     def test_replace_with_dict_with_bool_keys(self):
         df = DataFrame({0: [True, False], 1: [False, True]})
-        with pytest.raises(TypeError, match="Cannot compare types .+"):
-            df.replace({"asdf": "asdb", True: "yes"})
+        result = df.replace({"asdf": "asdb", True: "yes"})
+        expected = DataFrame({0: ["yes", False], 1: [False, "yes"]})
+        tm.assert_frame_equal(result, expected)
+
+    def test_replace_dict_strings_vs_ints(self):
+        # GH#34789
+        df = pd.DataFrame({"Y0": [1, 2], "Y1": [3, 4]})
+        result = df.replace({"replace_string": "test"})
+
+        tm.assert_frame_equal(result, df)
+
+        result = df["Y0"].replace({"replace_string": "test"})
+        tm.assert_series_equal(result, df["Y0"])
 
     def test_replace_truthy(self):
         df = DataFrame({"a": [True, True]})
diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index f78a28c66e946..02c2c0dca1873 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -218,8 +218,9 @@ def test_replace_bool_with_bool(self):
 
     def test_replace_with_dict_with_bool_keys(self):
         s = pd.Series([True, False, True])
-        with pytest.raises(TypeError, match="Cannot compare types .+"):
-            s.replace({"asdf": "asdb", True: "yes"})
+        result = s.replace({"asdf": "asdb", True: "yes"})
+        expected = pd.Series(["yes", False, "yes"])
+        tm.assert_series_equal(result, expected)
 
     def test_replace2(self):
         N = 100

From 839b757218bc998be901149460f79d797c0e9838 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 3 Sep 2020 10:21:27 -0700
Subject: [PATCH 2/4] missing import

---
 pandas/core/array_algos/replace.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py
index 341472e764e7f..32351a42df092 100644
--- a/pandas/core/array_algos/replace.py
+++ b/pandas/core/array_algos/replace.py
@@ -14,6 +14,7 @@
     is_numeric_v_string_like,
     is_scalar,
 )
+from pandas.core.dtypes.missing import isna
 
 
 def compare_or_regex_search(

From b683707bfb88af8daae24e77c444d915b1db3bb3 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 3 Sep 2020 10:51:58 -0700
Subject: [PATCH 3/4] avoid specific BooleanArray special casing

---
 pandas/core/array_algos/replace.py | 2 +-
 pandas/core/internals/blocks.py    | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py
index 32351a42df092..6ac3cc1f9f2fe 100644
--- a/pandas/core/array_algos/replace.py
+++ b/pandas/core/array_algos/replace.py
@@ -77,7 +77,7 @@ def _check_comparison_types(
         # GH#29553 avoid deprecation warnings from numpy
         return np.zeros(a.shape, dtype=bool)
 
-    elif is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b):
+    elif is_datetimelike_v_numeric(a, b):
         # GH#29553 avoid deprecation warnings from numpy
         _check_comparison_types(False, a, b)
         return False
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 66448cac63106..30ea2766e5133 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -62,7 +62,6 @@
 from pandas.core.array_algos.replace import compare_or_regex_search
 from pandas.core.array_algos.transforms import shift
 from pandas.core.arrays import (
-    BooleanArray,
     Categorical,
     DatetimeArray,
     ExtensionArray,
@@ -2924,11 +2923,11 @@ def _extract_bool_array(mask: ArrayLike) -> np.ndarray:
     """
     If we have a SparseArray or BooleanArray, convert it to ndarray[bool].
     """
-    if isinstance(mask, BooleanArray):
-        mask = mask.to_numpy(dtype=bool, na_value=False)
-    elif isinstance(mask, ExtensionArray):
+    if isinstance(mask, ExtensionArray):
         # We could have BooleanArray, Sparse[bool], ...
-        mask = np.asarray(mask, dtype=np.bool_)
+        #  Except for BooleanArray, this is equivalent to just
+        #  np.asarray(mask, dtype=bool)
+        mask = mask.to_numpy(dtype=bool, na_value=False)
 
     assert isinstance(mask, np.ndarray), type(mask)
     assert mask.dtype == bool, mask.dtype

From 30be372ccdac418933d851e3f253412a06868418 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 4 Sep 2020 09:55:20 -0700
Subject: [PATCH 4/4] update docs

---
 doc/source/user_guide/missing_data.rst | 26 --------------------------
 pandas/core/generic.py                 | 14 --------------
 2 files changed, 40 deletions(-)

diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst
index 2e68a0598bb71..28206192dd161 100644
--- a/doc/source/user_guide/missing_data.rst
+++ b/doc/source/user_guide/missing_data.rst
@@ -689,32 +689,6 @@ You can also operate on the DataFrame in place:
 
    df.replace(1.5, np.nan, inplace=True)
 
-.. warning::
-
-   When replacing multiple ``bool`` or ``datetime64`` objects, the first
-   argument to ``replace`` (``to_replace``) must match the type of the value
-   being replaced. For example,
-
-   .. code-block:: python
-
-      >>> s = pd.Series([True, False, True])
-      >>> s.replace({'a string': 'new value', True: False})  # raises
-      TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
-
-   will raise a ``TypeError`` because one of the ``dict`` keys is not of the
-   correct type for replacement.
-
-   However, when replacing a *single* object such as,
-
-   .. ipython:: python
-
-      s = pd.Series([True, False, True])
-      s.replace('a string', 'another string')
-
-   the original ``NDFrame`` object will be returned untouched. We're working on
-   unifying this API, but for backwards compatibility reasons we cannot break
-   the latter behavior. See :issue:`6354` for more details.
-
 Missing data casting rules and indexing
 ---------------------------------------
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 6c8780a0fc186..7b8072279ce69 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6559,20 +6559,6 @@ def replace(
         1   new  new
         2  bait  xyz
 
-        Note that when replacing multiple ``bool`` or ``datetime64`` objects,
-        the data types in the `to_replace` parameter must match the data
-        type of the value being replaced:
-
-        >>> df = pd.DataFrame({{'A': [True, False, True],
-        ...                    'B': [False, True, False]}})
-        >>> df.replace({{'a string': 'new value', True: False}})  # raises
-        Traceback (most recent call last):
-            ...
-        TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
-
-        This raises a ``TypeError`` because one of the ``dict`` keys is not of
-        the correct type for replacement.
-
         Compare the behavior of ``s.replace({{'a': None}})`` and
         ``s.replace('a', None)`` to understand the peculiarities
         of the `to_replace` parameter: