Skip to content

Commit ac8845b

Browse files
Backport PR pandas-dev#35697: REGR: Don't ignore compiled patterns in replace (pandas-dev#35765)
Co-authored-by: Daniel Saxton <[email protected]>
1 parent a49bfcd commit ac8845b

File tree

4 files changed

+37
-5
lines changed

4 files changed

+37
-5
lines changed

doc/source/whatsnew/v1.1.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Fixed regressions
2626
- Fixed regression in :meth:`DataFrame.reset_index` would raise a ``ValueError`` on empty :class:`DataFrame` with a :class:`MultiIndex` with a ``datetime64`` dtype level (:issue:`35606`, :issue:`35657`)
2727
- Fixed regression where :meth:`DataFrame.merge_asof` would raise a ``UnboundLocalError`` when ``left_index`` , ``right_index`` and ``tolerance`` were set (:issue:`35558`)
2828
- Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`)
29+
- Fixed regression in :meth:`DataFrame.replace` and :meth:`Series.replace` where compiled regular expressions would be ignored during replacement (:issue:`35680`)
2930
- Fixed regression in :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` where a list of functions would produce the wrong results if at least one of the functions did not aggregate. (:issue:`35490`)
3031

3132
.. ---------------------------------------------------------------------------

pandas/core/internals/managers.py

+18-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,17 @@
22
import itertools
33
import operator
44
import re
5-
from typing import DefaultDict, Dict, List, Optional, Sequence, Tuple, TypeVar, Union
5+
from typing import (
6+
DefaultDict,
7+
Dict,
8+
List,
9+
Optional,
10+
Pattern,
11+
Sequence,
12+
Tuple,
13+
TypeVar,
14+
Union,
15+
)
616
import warnings
717

818
import numpy as np
@@ -1922,7 +1932,10 @@ def _merge_blocks(
19221932

19231933

19241934
def _compare_or_regex_search(
1925-
a: ArrayLike, b: Scalar, regex: bool = False, mask: Optional[ArrayLike] = None
1935+
a: ArrayLike,
1936+
b: Union[Scalar, Pattern],
1937+
regex: bool = False,
1938+
mask: Optional[ArrayLike] = None,
19261939
) -> Union[ArrayLike, bool]:
19271940
"""
19281941
Compare two array_like inputs of the same shape or two scalar values
@@ -1933,7 +1946,7 @@ def _compare_or_regex_search(
19331946
Parameters
19341947
----------
19351948
a : array_like
1936-
b : scalar
1949+
b : scalar or regex pattern
19371950
regex : bool, default False
19381951
mask : array_like or None (default)
19391952
@@ -1943,7 +1956,7 @@ def _compare_or_regex_search(
19431956
"""
19441957

19451958
def _check_comparison_types(
1946-
result: Union[ArrayLike, bool], a: ArrayLike, b: Scalar,
1959+
result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern],
19471960
):
19481961
"""
19491962
Raises an error if the two arrays (a,b) cannot be compared.
@@ -1964,7 +1977,7 @@ def _check_comparison_types(
19641977
else:
19651978
op = np.vectorize(
19661979
lambda x: bool(re.search(b, x))
1967-
if isinstance(x, str) and isinstance(b, str)
1980+
if isinstance(x, str) and isinstance(b, (str, Pattern))
19681981
else False
19691982
)
19701983

pandas/tests/frame/methods/test_replace.py

+8
Original file line numberDiff line numberDiff line change
@@ -1573,3 +1573,11 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d
15731573
result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"})
15741574

15751575
tm.assert_frame_equal(result, expected)
1576+
1577+
def test_replace_with_compiled_regex(self):
1578+
# https://github.com/pandas-dev/pandas/issues/35680
1579+
df = pd.DataFrame(["a", "b", "c"])
1580+
regex = re.compile("^a$")
1581+
result = df.replace({regex: "z"}, regex=True)
1582+
expected = pd.DataFrame(["z", "b", "c"])
1583+
tm.assert_frame_equal(result, expected)

pandas/tests/series/methods/test_replace.py

+10
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import re
2+
13
import numpy as np
24
import pytest
35

@@ -415,3 +417,11 @@ def test_replace_extension_other(self):
415417
# https://github.com/pandas-dev/pandas/issues/34530
416418
ser = pd.Series(pd.array([1, 2, 3], dtype="Int64"))
417419
ser.replace("", "") # no exception
420+
421+
def test_replace_with_compiled_regex(self):
422+
# https://github.com/pandas-dev/pandas/issues/35680
423+
s = pd.Series(["a", "b", "c"])
424+
regex = re.compile("^a$")
425+
result = s.replace({regex: "z"}, regex=True)
426+
expected = pd.Series(["z", "b", "c"])
427+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)