pandas-dev · jreback · Sep 5, 2020 · Sep 3, 2020 · Sep 3, 2020 · Sep 3, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -335,6 +335,7 @@ ExtensionArray
 Other
 ^^^^^
 - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
+- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py
@@ -0,0 +1,94 @@
+"""
+Methods used by Block.replace and related methods.
+"""
+import operator
+import re
+from typing import Optional, Pattern, Union
+
+import numpy as np
+
+from pandas._typing import ArrayLike, Scalar
+
+from pandas.core.dtypes.common import (
+    is_datetimelike_v_numeric,
+    is_numeric_v_string_like,
+    is_scalar,
+)
+
+
+def compare_or_regex_search(
+    a: ArrayLike,
+    b: Union[Scalar, Pattern],
+    regex: bool = False,
+    mask: Optional[ArrayLike] = None,
+) -> Union[ArrayLike, bool]:
+    """
+    Compare two array_like inputs of the same shape or two scalar values
+
+    Calls operator.eq or re.search, depending on regex argument. If regex is
+    True, perform an element-wise regex matching.
+
+    Parameters
+    ----------
+    a : array_like
+    b : scalar or regex pattern
+    regex : bool, default False
+    mask : array_like or None (default)
+
+    Returns
+    -------
+    mask : array_like of bool
+    """
+
+    def _check_comparison_types(
+        result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]
+    ):
+        """
+        Raises an error if the two arrays (a,b) cannot be compared.
+        Otherwise, returns the comparison result as expected.
+        """
+        if is_scalar(result) and isinstance(a, np.ndarray):
+            type_names = [type(a).__name__, type(b).__name__]
+
+            if isinstance(a, np.ndarray):
+                type_names[0] = f"ndarray(dtype={a.dtype})"
+
+            raise TypeError(
+                f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
+            )
+
+    if not regex:
+        op = lambda x: operator.eq(x, b)
+    else:
+        op = np.vectorize(
+            lambda x: bool(re.search(b, x))
+            if isinstance(x, str) and isinstance(b, (str, Pattern))
+            else False
+        )
+
+    # GH#32621 use mask to avoid comparing to NAs
+    if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
+        mask = np.reshape(~(isna(a)), a.shape)
+    if isinstance(a, np.ndarray):
+        a = a[mask]
+
+    if is_numeric_v_string_like(a, b):
+        # GH#29553 avoid deprecation warnings from numpy
+        return np.zeros(a.shape, dtype=bool)
+
+    elif is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b):
+        # GH#29553 avoid deprecation warnings from numpy
+        _check_comparison_types(False, a, b)
+        return False
+
+    result = op(a)
+
+    if isinstance(result, np.ndarray) and mask is not None:
+        # The shape of the mask can differ to that of the result
+        # since we may compare only a subset of a's or b's elements
+        tmp = np.zeros(mask.shape, dtype=np.bool_)
+        tmp[mask] = result
+        result = tmp
+
+    _check_comparison_types(result, a, b)
+    return result
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -11,7 +11,7 @@
 from pandas._libs.internals import BlockPlacement
 from pandas._libs.tslibs import conversion
 from pandas._libs.tslibs.timezones import tz_compare
-from pandas._typing import ArrayLike
+from pandas._typing import ArrayLike, Scalar
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.cast import (
@@ -59,8 +59,10 @@
 from pandas.core.dtypes.missing import _isna_compat, is_valid_nat_for_dtype, isna
 
 import pandas.core.algorithms as algos
+from pandas.core.array_algos.replace import compare_or_regex_search
 from pandas.core.array_algos.transforms import shift
 from pandas.core.arrays import (
+    BooleanArray,
     Categorical,
     DatetimeArray,
     ExtensionArray,
@@ -792,7 +794,6 @@ def _replace_list(
         self,
         src_list: List[Any],
         dest_list: List[Any],
-        masks: List[np.ndarray],
         inplace: bool = False,
         regex: bool = False,
     ) -> List["Block"]:
@@ -801,11 +802,28 @@ def _replace_list(
         """
         src_len = len(src_list) - 1
 
+        def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
+            """
+            Generate a bool array by perform an equality check, or perform
+            an element-wise regular expression matching
+            """
+            if isna(s):
+                return ~mask
+
+            s = com.maybe_box_datetimelike(s)
+            return compare_or_regex_search(self.values, s, regex, mask)
+
+        # Calculate the mask once, prior to the call of comp
+        # in order to avoid repeating the same computations
+        mask = ~isna(self.values)
+
+        masks = [comp(s, mask, regex) for s in src_list]
+
         rb = [self if inplace else self.copy()]
         for i, (src, dest) in enumerate(zip(src_list, dest_list)):
             new_rb: List["Block"] = []
             for blk in rb:
-                m = masks[i][blk.mgr_locs.indexer]
+                m = masks[i]
                 convert = i == src_len  # only convert once at the end
                 result = blk._replace_coerce(
                     mask=m,
@@ -2906,7 +2924,9 @@ def _extract_bool_array(mask: ArrayLike) -> np.ndarray:
     """
     If we have a SparseArray or BooleanArray, convert it to ndarray[bool].
     """
-    if isinstance(mask, ExtensionArray):
+    if isinstance(mask, BooleanArray):
+        mask = mask.to_numpy(dtype=bool, na_value=False)
+    elif isinstance(mask, ExtensionArray):
         # We could have BooleanArray, Sparse[bool], ...
         mask = np.asarray(mask, dtype=np.bool_)
 

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1,14 +1,11 @@
 from collections import defaultdict
 import itertools
-import operator
-import re
 from typing import (
     Any,
     DefaultDict,
     Dict,
     List,
     Optional,
-    Pattern,
     Sequence,
     Tuple,
     TypeVar,
@@ -19,7 +16,7 @@
 import numpy as np
 
 from pandas._libs import internals as libinternals, lib
-from pandas._typing import ArrayLike, DtypeObj, Label, Scalar
+from pandas._typing import ArrayLike, DtypeObj, Label
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core.dtypes.cast import (
@@ -29,12 +26,9 @@
 )
 from pandas.core.dtypes.common import (
     DT64NS_DTYPE,
-    is_datetimelike_v_numeric,
     is_dtype_equal,
     is_extension_array_dtype,
     is_list_like,
-    is_numeric_v_string_like,
-    is_scalar,
 )
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -44,7 +38,6 @@
 import pandas.core.algorithms as algos
 from pandas.core.arrays.sparse import SparseDtype
 from pandas.core.base import PandasObject
-import pandas.core.common as com
 from pandas.core.construction import extract_array
 from pandas.core.indexers import maybe_convert_indices
 from pandas.core.indexes.api import Index, ensure_index
@@ -628,31 +621,10 @@ def replace_list(
         """ do a list replace """
         inplace = validate_bool_kwarg(inplace, "inplace")
 
-        # figure out our mask apriori to avoid repeated replacements
-        values = self.as_array()
-
-        def comp(s: Scalar, mask: np.ndarray, regex: bool = False):
-            """
-            Generate a bool array by perform an equality check, or perform
-            an element-wise regular expression matching
-            """
-            if isna(s):
-                return ~mask
-
-            s = com.maybe_box_datetimelike(s)
-            return _compare_or_regex_search(values, s, regex, mask)
-
-        # Calculate the mask once, prior to the call of comp
-        # in order to avoid repeating the same computations
-        mask = ~isna(values)
-
-        masks = [comp(s, mask, regex) for s in src_list]
-
         bm = self.apply(
             "_replace_list",
             src_list=src_list,
             dest_list=dest_list,
-            masks=masks,
             inplace=inplace,
             regex=regex,
         )
@@ -1900,80 +1872,6 @@ def _merge_blocks(
     return blocks
 
 
-def _compare_or_regex_search(
-    a: ArrayLike,
-    b: Union[Scalar, Pattern],
-    regex: bool = False,
-    mask: Optional[ArrayLike] = None,
-) -> Union[ArrayLike, bool]:
-    """
-    Compare two array_like inputs of the same shape or two scalar values
-
-    Calls operator.eq or re.search, depending on regex argument. If regex is
-    True, perform an element-wise regex matching.
-
-    Parameters
-    ----------
-    a : array_like
-    b : scalar or regex pattern
-    regex : bool, default False
-    mask : array_like or None (default)
-
-    Returns
-    -------
-    mask : array_like of bool
-    """
-
-    def _check_comparison_types(
-        result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]
-    ):
-        """
-        Raises an error if the two arrays (a,b) cannot be compared.
-        Otherwise, returns the comparison result as expected.
-        """
-        if is_scalar(result) and isinstance(a, np.ndarray):
-            type_names = [type(a).__name__, type(b).__name__]
-
-            if isinstance(a, np.ndarray):
-                type_names[0] = f"ndarray(dtype={a.dtype})"
-
-            raise TypeError(
-                f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
-            )
-
-    if not regex:
-        op = lambda x: operator.eq(x, b)
-    else:
-        op = np.vectorize(
-            lambda x: bool(re.search(b, x))
-            if isinstance(x, str) and isinstance(b, (str, Pattern))
-            else False
-        )
-
-    # GH#32621 use mask to avoid comparing to NAs
-    if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
-        mask = np.reshape(~(isna(a)), a.shape)
-    if isinstance(a, np.ndarray):
-        a = a[mask]
-
-    if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b):
-        # GH#29553 avoid deprecation warnings from numpy
-        _check_comparison_types(False, a, b)
-        return False
-
-    result = op(a)
-
-    if isinstance(result, np.ndarray) and mask is not None:
-        # The shape of the mask can differ to that of the result
-        # since we may compare only a subset of a's or b's elements
-        tmp = np.zeros(mask.shape, dtype=np.bool_)
-        tmp[mask] = result
-        result = tmp
-
-    _check_comparison_types(result, a, b)
-    return result
-
-
 def _fast_count_smallints(arr: np.ndarray) -> np.ndarray:
     """Faster version of set(arr) for sequences of small numbers."""
     counts = np.bincount(arr.astype(np.int_))

diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
@@ -1131,8 +1131,19 @@ def test_replace_bool_with_bool(self):
 
     def test_replace_with_dict_with_bool_keys(self):
         df = DataFrame({0: [True, False], 1: [False, True]})
-        with pytest.raises(TypeError, match="Cannot compare types .+"):
-            df.replace({"asdf": "asdb", True: "yes"})
+        result = df.replace({"asdf": "asdb", True: "yes"})
+        expected = DataFrame({0: ["yes", False], 1: [False, "yes"]})
+        tm.assert_frame_equal(result, expected)
+
+    def test_replace_dict_strings_vs_ints(self):
+        # GH#34789
+        df = pd.DataFrame({"Y0": [1, 2], "Y1": [3, 4]})
+        result = df.replace({"replace_string": "test"})
+
+        tm.assert_frame_equal(result, df)
+
+        result = df["Y0"].replace({"replace_string": "test"})
+        tm.assert_series_equal(result, df["Y0"])
 
     def test_replace_truthy(self):
         df = DataFrame({"a": [True, True]})

diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
@@ -218,8 +218,9 @@ def test_replace_bool_with_bool(self):
 
     def test_replace_with_dict_with_bool_keys(self):
         s = pd.Series([True, False, True])
-        with pytest.raises(TypeError, match="Cannot compare types .+"):
-            s.replace({"asdf": "asdb", True: "yes"})
+        result = s.replace({"asdf": "asdb", True: "yes"})
+        expected = pd.Series(["yes", False, "yes"])
+        tm.assert_series_equal(result, expected)
 
     def test_replace2(self):
         N = 100
-Original file line number
+Diff line change
@@ Expand Up / @@ -335,6 +335,7 @@ ExtensionArray @@
     Other
     ^^^^^
     - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
+    - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
     -
     .. ---------------------------------------------------------------------------
@@ Expand Down @@