Skip to content

Commit 114e0d9

Browse files
Backport PR #40604: REGR: replace with multivalued regex raising (#40648)
Co-authored-by: Matthew Zeitlin <[email protected]>
1 parent ed966dd commit 114e0d9

File tree

3 files changed

+36
-3
lines changed

3 files changed

+36
-3
lines changed

doc/source/whatsnew/v1.2.4.rst

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`)
1919
- Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`)
2020
- Fixed regression in :meth:`DataFrame.where` not returning a copy in the case of an all True condition (:issue:`39595`)
21+
- Fixed regression in :meth:`DataFrame.replace` raising ``IndexError`` when ``regex`` was a multi-key dictionary (:issue:`39338`)
2122
-
2223

2324
.. ---------------------------------------------------------------------------

pandas/core/internals/blocks.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -894,10 +894,20 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
894894

895895
rb = [self if inplace else self.copy()]
896896
for i, (src, dest) in enumerate(pairs):
897+
convert = i == src_len # only convert once at the end
897898
new_rb: List["Block"] = []
898-
for blk in rb:
899-
m = masks[i]
900-
convert = i == src_len # only convert once at the end
899+
900+
# GH-39338: _replace_coerce can split a block into
901+
# single-column blocks, so track the index so we know
902+
# where to index into the mask
903+
for blk_num, blk in enumerate(rb):
904+
if len(rb) == 1:
905+
m = masks[i]
906+
else:
907+
mib = masks[i]
908+
assert not isinstance(mib, bool)
909+
m = mib[blk_num : blk_num + 1]
910+
901911
result = blk._replace_coerce(
902912
to_replace=src,
903913
value=dest,

pandas/tests/frame/methods/test_replace.py

+22
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,28 @@ def test_regex_replace_numeric_to_object_conversion(self, mix_abc):
644644
tm.assert_frame_equal(res, expec)
645645
assert res.a.dtype == np.object_
646646

647+
@pytest.mark.parametrize(
648+
"to_replace", [{"": np.nan, ",": ""}, {",": "", "": np.nan}]
649+
)
650+
def test_joint_simple_replace_and_regex_replace(self, to_replace):
651+
# GH-39338
652+
df = DataFrame(
653+
{
654+
"col1": ["1,000", "a", "3"],
655+
"col2": ["a", "", "b"],
656+
"col3": ["a", "b", "c"],
657+
}
658+
)
659+
result = df.replace(regex=to_replace)
660+
expected = DataFrame(
661+
{
662+
"col1": ["1000", "a", "3"],
663+
"col2": ["a", np.nan, "b"],
664+
"col3": ["a", "b", "c"],
665+
}
666+
)
667+
tm.assert_frame_equal(result, expected)
668+
647669
@pytest.mark.parametrize("metachar", ["[]", "()", r"\d", r"\w", r"\s"])
648670
def test_replace_regex_metachar(self, metachar):
649671
df = DataFrame({"a": [metachar, "else"]})

0 commit comments

Comments
 (0)