REGR: replace with multivalued regex raising (pandas-dev#40604)

mzeitlin11 · vladu · commit 4ee5ac48efda · 2021-04-05T10:53:45.000-05:00
diff --git a/doc/source/whatsnew/v1.2.4.rst b/doc/source/whatsnew/v1.2.4.rst
@@ -18,6 +18,7 @@ Fixed regressions
 - Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`)
 - Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`)
 - Fixed regression in :meth:`DataFrame.where` not returning a copy in the case of an all True condition (:issue:`39595`)
+- Fixed regression in :meth:`DataFrame.replace` raising ``IndexError`` when ``regex`` was a multi-key dictionary (:issue:`39338`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -813,10 +813,20 @@ def _replace_list(
 
         rb = [self if inplace else self.copy()]
         for i, (src, dest) in enumerate(pairs):
+            convert = i == src_len  # only convert once at the end
             new_rb: List[Block] = []
-            for blk in rb:
-                m = masks[i]
-                convert = i == src_len  # only convert once at the end
+
+            # GH-39338: _replace_coerce can split a block into
+            # single-column blocks, so track the index so we know
+            # where to index into the mask
+            for blk_num, blk in enumerate(rb):
+                if len(rb) == 1:
+                    m = masks[i]
+                else:
+                    mib = masks[i]
+                    assert not isinstance(mib, bool)
+                    m = mib[blk_num : blk_num + 1]
+
                 result = blk._replace_coerce(
                     to_replace=src,
                     value=dest,
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
@@ -654,6 +654,28 @@ def test_regex_replace_numeric_to_object_conversion(self, mix_abc):
         tm.assert_frame_equal(res, expec)
         assert res.a.dtype == np.object_
 
+    @pytest.mark.parametrize(
+        "to_replace", [{"": np.nan, ",": ""}, {",": "", "": np.nan}]
+    )
+    def test_joint_simple_replace_and_regex_replace(self, to_replace):
+        # GH-39338
+        df = DataFrame(
+            {
+                "col1": ["1,000", "a", "3"],
+                "col2": ["a", "", "b"],
+                "col3": ["a", "b", "c"],
+            }
+        )
+        result = df.replace(regex=to_replace)
+        expected = DataFrame(
+            {
+                "col1": ["1000", "a", "3"],
+                "col2": ["a", np.nan, "b"],
+                "col3": ["a", "b", "c"],
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+
     @pytest.mark.parametrize("metachar", ["[]", "()", r"\d", r"\w", r"\s"])
     def test_replace_regex_metachar(self, metachar):
         df = DataFrame({"a": [metachar, "else"]})

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@ Fixed regressions`
`18`	`18`	- Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`)
`19`	`19`	- Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`)
`20`	`20`	- Fixed regression in :meth:`DataFrame.where` not returning a copy in the case of an all True condition (:issue:`39595`)
	`21`	+- Fixed regression in :meth:`DataFrame.replace` raising ``IndexError`` when ``regex`` was a multi-key dictionary (:issue:`39338`)
`21`	`22`	`-`
`22`	`23`
`23`	`24`	`.. ---------------------------------------------------------------------------`