pandas-dev · jreback · Aug 9, 2018 · Jun 7, 2018 · Jul 31, 2018 · Jul 31, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -668,6 +668,7 @@ Reshaping
 - :func:`pandas.core.groupby.GroupBy.rank` now raises a ``ValueError`` when an invalid value is passed for argument ``na_option`` (:issue:`22124`)
 - Bug in :func:`get_dummies` with Unicode attributes in Python 2 (:issue:`22084`)
 - Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`)
+- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`)
 -
 
 Build Changes

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1695,6 +1695,45 @@ def _nanpercentile(values, q, axis, **kw):
                               placement=np.arange(len(result)),
                               ndim=ndim)
 
+    def _replace_coerce(self, to_replace, value, inplace=True, regex=False,
+                        convert=False, mgr=None, mask=None):
+        """
+        Replace value corresponding to the given boolean array with another
+        value.
+
+        Parameters
+        ----------
+        to_replace : object or pattern
+            Scalar to replace or regular expression to match.
+        value : object
+            Replacement object.
+        inplace : bool, default False
+            Perform inplace modification.
+        regex : bool, default False
+            If true, perform regular expression substitution.
+        convert : bool, default True
+            If true, try to coerce any object types to better types.
+        mgr : BlockManager, optional
+        mask : array-like of bool, optional
+            True indicate corresponding element is ignored.
+
+        Returns
+        -------
+        A new block if there is anything to replace or the original block.
+        """
+
+        if mask.any():
+            if not regex:
+                self = self.coerce_to_target_dtype(value)
+                return self.putmask(mask, value, inplace=inplace)
+            else:
+                return self._replace_single(to_replace, value, inplace=inplace,
+                                            regex=regex,
+                                            convert=convert,
+                                            mask=mask,
+                                            mgr=mgr)
+        return self
+
 
 class ScalarBlock(Block):
     """
@@ -2470,8 +2509,31 @@ def replace(self, to_replace, value, inplace=False, filter=None,
                                     regex=regex, mgr=mgr)
 
     def _replace_single(self, to_replace, value, inplace=False, filter=None,
-                        regex=False, convert=True, mgr=None):
+                        regex=False, convert=True, mgr=None, mask=None):
+        """
+        Replace elements by the given value.
 
+        Parameters
+        ----------
+        to_replace : object or pattern
+            Scalar to replace or regular expression to match.
+        value : object
+            Replacement object.
+        inplace : bool, default False
+            Perform inplace modification.
+        filter : list, optional
+        regex : bool, default False
+            If true, perform regular expression substitution.
+        convert : bool, default True
+            If true, try to coerce any object types to better types.
+        mgr : BlockManager, optional
+        mask : array-like of bool, optional
+            True indicate corresponding element is ignored.
+
+        Returns
+        -------
+        a new block, the result after replacing
+        """
         inplace = validate_bool_kwarg(inplace, 'inplace')
 
         # to_replace is regex compilable
@@ -2537,15 +2599,53 @@ def re_replacer(s):
         else:
             filt = self.mgr_locs.isin(filter).nonzero()[0]
 
-        new_values[filt] = f(new_values[filt])
+        if mask is None:
+            new_values[filt] = f(new_values[filt])
+        else:
+            new_values[filt][mask] = f(new_values[filt][mask])
 
         # convert
         block = self.make_block(new_values)
         if convert:
             block = block.convert(by_item=True, numeric=False)
-
         return block
 
+    def _replace_coerce(self, to_replace, value, inplace=True, regex=False,
+                        convert=False, mgr=None, mask=None):
+        """
+        Replace value corresponding to the given boolean array with another
+        value.
+
+        Parameters
+        ----------
+        to_replace : object or pattern
+            Scalar to replace or regular expression to match.
+        value : object
+            Replacement object.
+        inplace : bool, default False
+            Perform inplace modification.
+        regex : bool, default False
+            If true, perform regular expression substitution.
+        convert : bool, default True
+            If true, try to coerce any object types to better types.
+        mgr : BlockManager, optional
+        mask : array-like of bool, optional
+            True indicate corresponding element is ignored.
+
+        Returns
+        -------
+        A new block if there is anything to replace or the original block.
+        """
+        if mask.any():
+            block = super(ObjectBlock, self)._replace_coerce(
+                to_replace=to_replace, value=value, inplace=inplace,
+                regex=regex, convert=convert, mgr=mgr, mask=mask)
+            if convert:
+                block = [b.convert(by_item=True, numeric=False, copy=True)
+                         for b in block]
+            return block
+        return self
+
 
 class CategoricalBlock(ExtensionBlock):
     __slots__ = ()

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -3,6 +3,7 @@
 from functools import partial
 import itertools
 import operator
+import re
 
 import numpy as np
 
@@ -23,7 +24,8 @@
 from pandas.core.dtypes.cast import (
     maybe_promote,
     infer_dtype_from_scalar,
-    find_common_type)
+    find_common_type,
+    maybe_convert_objects)
 from pandas.core.dtypes.missing import isna
 import pandas.core.dtypes.concat as _concat
 from pandas.core.dtypes.generic import ABCSeries, ABCExtensionArray
@@ -571,12 +573,19 @@ def replace_list(self, src_list, dest_list, inplace=False, regex=False,
         # figure out our mask a-priori to avoid repeated replacements
         values = self.as_array()
 
-        def comp(s):
+        def comp(s, regex=False):
+            """
+            Generate a bool array by perform an equality check, or perform
+            an element-wise regular expression matching
+            """
             if isna(s):
                 return isna(values)
-            return _maybe_compare(values, getattr(s, 'asm8', s), operator.eq)
+            if hasattr(s, 'asm8'):
+                return _compare_or_regex_match(maybe_convert_objects(values),
+                                               getattr(s, 'asm8'), regex)
+            return _compare_or_regex_match(values, s, regex)
 
-        masks = [comp(s) for i, s in enumerate(src_list)]
+        masks = [comp(s, regex) for i, s in enumerate(src_list)]
 
         result_blocks = []
         src_len = len(src_list) - 1
@@ -588,20 +597,16 @@ def comp(s):
             for i, (s, d) in enumerate(zip(src_list, dest_list)):
                 new_rb = []
                 for b in rb:
-                    if b.dtype == np.object_:
-                        convert = i == src_len
-                        result = b.replace(s, d, inplace=inplace, regex=regex,
-                                           mgr=mgr, convert=convert)
+                    m = masks[i][b.mgr_locs.indexer]
+                    convert = i == src_len
+                    result = b._replace_coerce(mask=m, to_replace=s, value=d,
+                                               inplace=inplace,
+                                               convert=convert, regex=regex,
+                                               mgr=mgr)
+                    if m.any():
                         new_rb = _extend_blocks(result, new_rb)
                     else:
-                        # get our mask for this element, sized to this
-                        # particular block
-                        m = masks[i][b.mgr_locs.indexer]
-                        if m.any():
-                            b = b.coerce_to_target_dtype(d)
-                            new_rb.extend(b.putmask(m, d, inplace=True))
-                        else:
-                            new_rb.append(b)
+                        new_rb.append(b)
                 rb = new_rb
             result_blocks.extend(rb)
 
@@ -1890,7 +1895,28 @@ def _consolidate(blocks):
     return new_blocks
 
 
-def _maybe_compare(a, b, op):
+def _compare_or_regex_match(a, b, regex=False):
+    """
+    Compare two array_like inputs of the same shape or two scalar values
+
+    Calls operator.eq or re.match, depending on regex argument. If regex is
+    True, perform an element-wise regex matching.
+
+    Parameters
+    ----------
+    a : array_like or scalar
+    b : array_like or scalar
+    regex : bool, default False
+
+    Returns
+    -------
+    mask : array_like of bool
+    """
+    if not regex:
+        op = lambda x: operator.eq(x, b)
+    else:
+        op = np.vectorize(lambda x: bool(re.match(b, x)) if isinstance(x, str)
+                          else False)
 
     is_a_array = isinstance(a, np.ndarray)
     is_b_array = isinstance(b, np.ndarray)
@@ -1902,9 +1928,8 @@ def _maybe_compare(a, b, op):
     # numpy deprecation warning if comparing numeric vs string-like
     elif is_numeric_v_string_like(a, b):
         result = False
-
     else:
-        result = op(a, b)
+        result = op(a)
 
     if is_scalar(result) and (is_a_array or is_b_array):
         type_names = [type(a).__name__, type(b).__name__]

diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py
@@ -256,6 +256,14 @@ def test_replace_string_with_number(self):
         expected = pd.Series([1, 2, 3])
         tm.assert_series_equal(expected, result)
 
+    def test_replace_replacer_equals_replacement(self):
+        # GH 20656
+        # make sure all replacers are matching against original values
+        s = pd.Series(['a', 'b'])
+        expected = pd.Series(['b', 'a'])
+        result = s.replace({'a': 'b', 'b': 'a'})
+        tm.assert_series_equal(expected, result)
+
     def test_replace_unicode_with_number(self):
         # GH 15743
         s = pd.Series([1, 2, 3])