Backport PR pandas-dev#25266: BUG: Fix regression on DataFrame.replace for regex (pandas-dev#25477)

meeseeksmachine · jreback · commit 37da2c100fd0 · 2019-02-28T09:42:29.000-05:00
diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst
@@ -23,6 +23,7 @@ Fixed Regressions
 - Fixed regression in :meth:`DataFrame.all` and :meth:`DataFrame.any` where ``bool_only=True`` was ignored (:issue:`25101`)
 - Fixed issue in ``DataFrame`` construction with passing a mixed list of mixed types could segfault. (:issue:`25075`)
 - Fixed regression in :meth:`DataFrame.apply` causing ``RecursionError`` when ``dict``-like classes were passed as argument. (:issue:`25196`)
+- Fixed regression in :meth:`DataFrame.replace` where ``regex=True`` was only replacing patterns matching the start of the string (:issue:`25259`)
 
 - Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`)
 - Fixed regression in :meth:`Series.min` and :meth:`Series.max` where ``numeric_only=True`` was ignored when the ``Series`` contained ```Categorical`` data (:issue:`25299`)
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -552,9 +552,9 @@ def comp(s, regex=False):
             if isna(s):
                 return isna(values)
             if hasattr(s, 'asm8'):
-                return _compare_or_regex_match(maybe_convert_objects(values),
-                                               getattr(s, 'asm8'), regex)
-            return _compare_or_regex_match(values, s, regex)
+                return _compare_or_regex_search(maybe_convert_objects(values),
+                                                getattr(s, 'asm8'), regex)
+            return _compare_or_regex_search(values, s, regex)
 
         masks = [comp(s, regex) for i, s in enumerate(src_list)]
 
@@ -1901,11 +1901,11 @@ def _consolidate(blocks):
     return new_blocks
 
 
-def _compare_or_regex_match(a, b, regex=False):
+def _compare_or_regex_search(a, b, regex=False):
     """
     Compare two array_like inputs of the same shape or two scalar values
 
-    Calls operator.eq or re.match, depending on regex argument. If regex is
+    Calls operator.eq or re.search, depending on regex argument. If regex is
     True, perform an element-wise regex matching.
 
     Parameters
@@ -1921,7 +1921,7 @@ def _compare_or_regex_match(a, b, regex=False):
     if not regex:
         op = lambda x: operator.eq(x, b)
     else:
-        op = np.vectorize(lambda x: bool(re.match(b, x)) if isinstance(x, str)
+        op = np.vectorize(lambda x: bool(re.search(b, x)) if isinstance(x, str)
                           else False)
 
     is_a_array = isinstance(a, np.ndarray)
diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py
@@ -466,6 +466,13 @@ def test_regex_replace_dict_nested(self):
         assert_frame_equal(res3, expec)
         assert_frame_equal(res4, expec)
 
+    def test_regex_replace_dict_nested_non_first_character(self):
+        # GH 25259
+        df = pd.DataFrame({'first': ['abc', 'bca', 'cab']})
+        expected = pd.DataFrame({'first': ['.bc', 'bc.', 'c.b']})
+        result = df.replace({'a': '.'}, regex=True)
+        assert_frame_equal(result, expected)
+
     def test_regex_replace_dict_nested_gh4115(self):
         df = pd.DataFrame({'Type': ['Q', 'T', 'Q', 'Q', 'T'], 'tmp': 2})
         expected = DataFrame({'Type': [0, 1, 0, 0, 1], 'tmp': 2})