diff --git a/doc/source/whatsnew/v1.2.4.rst b/doc/source/whatsnew/v1.2.4.rst index 45d131327630e..26d768f830830 100644 --- a/doc/source/whatsnew/v1.2.4.rst +++ b/doc/source/whatsnew/v1.2.4.rst @@ -18,6 +18,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`) - Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`) - Fixed regression in :meth:`DataFrame.where` not returning a copy in the case of an all True condition (:issue:`39595`) +- Fixed regression in :meth:`DataFrame.replace` raising ``IndexError`` when ``regex`` was a multi-key dictionary (:issue:`39338`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 7d8dcb34ed582..c177618827edb 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -801,10 +801,20 @@ def _replace_list( rb = [self if inplace else self.copy()] for i, (src, dest) in enumerate(pairs): + convert = i == src_len # only convert once at the end new_rb: List[Block] = [] - for blk in rb: - m = masks[i] - convert = i == src_len # only convert once at the end + + # GH-39338: _replace_coerce can split a block into + # single-column blocks, so track the index so we know + # where to index into the mask + for blk_num, blk in enumerate(rb): + if len(rb) == 1: + m = masks[i] + else: + mib = masks[i] + assert not isinstance(mib, bool) + m = mib[blk_num : blk_num + 1] + result = blk._replace_coerce( to_replace=src, value=dest, diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 56750da7c90b2..d8f93f047e74b 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -654,6 +654,28 @@ def test_regex_replace_numeric_to_object_conversion(self, mix_abc): tm.assert_frame_equal(res, expec) assert res.a.dtype == np.object_ + @pytest.mark.parametrize( + "to_replace", [{"": np.nan, ",": ""}, {",": "", "": np.nan}] + ) + def test_joint_simple_replace_and_regex_replace(self, to_replace): + # GH-39338 + df = DataFrame( + { + "col1": ["1,000", "a", "3"], + "col2": ["a", "", "b"], + "col3": ["a", "b", "c"], + } + ) + result = df.replace(regex=to_replace) + expected = DataFrame( + { + "col1": ["1000", "a", "3"], + "col2": ["a", np.nan, "b"], + "col3": ["a", "b", "c"], + } + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("metachar", ["[]", "()", r"\d", r"\w", r"\s"]) def test_replace_regex_metachar(self, metachar): df = DataFrame({"a": [metachar, "else"]})