Skip to content

Commit 4ee5ac4

Browse files
mzeitlin11vladu
authored andcommitted
REGR: replace with multivalued regex raising (pandas-dev#40604)
1 parent ab80804 commit 4ee5ac4

File tree

3 files changed

+36
-3
lines changed

3 files changed

+36
-3
lines changed

doc/source/whatsnew/v1.2.4.rst

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`)
1919
- Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`)
2020
- Fixed regression in :meth:`DataFrame.where` not returning a copy in the case of an all True condition (:issue:`39595`)
21+
- Fixed regression in :meth:`DataFrame.replace` raising ``IndexError`` when ``regex`` was a multi-key dictionary (:issue:`39338`)
2122
-
2223

2324
.. ---------------------------------------------------------------------------

pandas/core/internals/blocks.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -813,10 +813,20 @@ def _replace_list(
813813

814814
rb = [self if inplace else self.copy()]
815815
for i, (src, dest) in enumerate(pairs):
816+
convert = i == src_len # only convert once at the end
816817
new_rb: List[Block] = []
817-
for blk in rb:
818-
m = masks[i]
819-
convert = i == src_len # only convert once at the end
818+
819+
# GH-39338: _replace_coerce can split a block into
820+
# single-column blocks, so track the index so we know
821+
# where to index into the mask
822+
for blk_num, blk in enumerate(rb):
823+
if len(rb) == 1:
824+
m = masks[i]
825+
else:
826+
mib = masks[i]
827+
assert not isinstance(mib, bool)
828+
m = mib[blk_num : blk_num + 1]
829+
820830
result = blk._replace_coerce(
821831
to_replace=src,
822832
value=dest,

pandas/tests/frame/methods/test_replace.py

+22
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,28 @@ def test_regex_replace_numeric_to_object_conversion(self, mix_abc):
654654
tm.assert_frame_equal(res, expec)
655655
assert res.a.dtype == np.object_
656656

657+
@pytest.mark.parametrize(
658+
"to_replace", [{"": np.nan, ",": ""}, {",": "", "": np.nan}]
659+
)
660+
def test_joint_simple_replace_and_regex_replace(self, to_replace):
661+
# GH-39338
662+
df = DataFrame(
663+
{
664+
"col1": ["1,000", "a", "3"],
665+
"col2": ["a", "", "b"],
666+
"col3": ["a", "b", "c"],
667+
}
668+
)
669+
result = df.replace(regex=to_replace)
670+
expected = DataFrame(
671+
{
672+
"col1": ["1000", "a", "3"],
673+
"col2": ["a", np.nan, "b"],
674+
"col3": ["a", "b", "c"],
675+
}
676+
)
677+
tm.assert_frame_equal(result, expected)
678+
657679
@pytest.mark.parametrize("metachar", ["[]", "()", r"\d", r"\w", r"\s"])
658680
def test_replace_regex_metachar(self, metachar):
659681
df = DataFrame({"a": [metachar, "else"]})

0 commit comments

Comments
 (0)