Skip to content

Commit 37da2c1

Browse files
meeseeksmachinejreback
authored andcommitted
Backport PR pandas-dev#25266: BUG: Fix regression on DataFrame.replace for regex (pandas-dev#25477)
1 parent 90b2daa commit 37da2c1

File tree

3 files changed

+14
-6
lines changed

3 files changed

+14
-6
lines changed

doc/source/whatsnew/v0.24.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Fixed Regressions
2323
- Fixed regression in :meth:`DataFrame.all` and :meth:`DataFrame.any` where ``bool_only=True`` was ignored (:issue:`25101`)
2424
- Fixed issue in ``DataFrame`` construction with passing a mixed list of mixed types could segfault. (:issue:`25075`)
2525
- Fixed regression in :meth:`DataFrame.apply` causing ``RecursionError`` when ``dict``-like classes were passed as argument. (:issue:`25196`)
26+
- Fixed regression in :meth:`DataFrame.replace` where ``regex=True`` was only replacing patterns matching the start of the string (:issue:`25259`)
2627

2728
- Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`)
2829
- Fixed regression in :meth:`Series.min` and :meth:`Series.max` where ``numeric_only=True`` was ignored when the ``Series`` contained ```Categorical`` data (:issue:`25299`)

pandas/core/internals/managers.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -552,9 +552,9 @@ def comp(s, regex=False):
552552
if isna(s):
553553
return isna(values)
554554
if hasattr(s, 'asm8'):
555-
return _compare_or_regex_match(maybe_convert_objects(values),
556-
getattr(s, 'asm8'), regex)
557-
return _compare_or_regex_match(values, s, regex)
555+
return _compare_or_regex_search(maybe_convert_objects(values),
556+
getattr(s, 'asm8'), regex)
557+
return _compare_or_regex_search(values, s, regex)
558558

559559
masks = [comp(s, regex) for i, s in enumerate(src_list)]
560560

@@ -1901,11 +1901,11 @@ def _consolidate(blocks):
19011901
return new_blocks
19021902

19031903

1904-
def _compare_or_regex_match(a, b, regex=False):
1904+
def _compare_or_regex_search(a, b, regex=False):
19051905
"""
19061906
Compare two array_like inputs of the same shape or two scalar values
19071907
1908-
Calls operator.eq or re.match, depending on regex argument. If regex is
1908+
Calls operator.eq or re.search, depending on regex argument. If regex is
19091909
True, perform an element-wise regex matching.
19101910
19111911
Parameters
@@ -1921,7 +1921,7 @@ def _compare_or_regex_match(a, b, regex=False):
19211921
if not regex:
19221922
op = lambda x: operator.eq(x, b)
19231923
else:
1924-
op = np.vectorize(lambda x: bool(re.match(b, x)) if isinstance(x, str)
1924+
op = np.vectorize(lambda x: bool(re.search(b, x)) if isinstance(x, str)
19251925
else False)
19261926

19271927
is_a_array = isinstance(a, np.ndarray)

pandas/tests/frame/test_replace.py

+7
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,13 @@ def test_regex_replace_dict_nested(self):
466466
assert_frame_equal(res3, expec)
467467
assert_frame_equal(res4, expec)
468468

469+
def test_regex_replace_dict_nested_non_first_character(self):
470+
# GH 25259
471+
df = pd.DataFrame({'first': ['abc', 'bca', 'cab']})
472+
expected = pd.DataFrame({'first': ['.bc', 'bc.', 'c.b']})
473+
result = df.replace({'a': '.'}, regex=True)
474+
assert_frame_equal(result, expected)
475+
469476
def test_regex_replace_dict_nested_gh4115(self):
470477
df = pd.DataFrame({'Type': ['Q', 'T', 'Q', 'Q', 'T'], 'tmp': 2})
471478
expected = DataFrame({'Type': [0, 1, 0, 0, 1], 'tmp': 2})

0 commit comments

Comments
 (0)