diff --git a/doc/source/whatsnew/v1.1.1.rst b/doc/source/whatsnew/v1.1.1.rst index 565b4a014bd0c..d93cd6edb983a 100644 --- a/doc/source/whatsnew/v1.1.1.rst +++ b/doc/source/whatsnew/v1.1.1.rst @@ -26,6 +26,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.reset_index` would raise a ``ValueError`` on empty :class:`DataFrame` with a :class:`MultiIndex` with a ``datetime64`` dtype level (:issue:`35606`, :issue:`35657`) - Fixed regression where :meth:`DataFrame.merge_asof` would raise a ``UnboundLocalError`` when ``left_index`` , ``right_index`` and ``tolerance`` were set (:issue:`35558`) - Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`) +- Fixed regression in :meth:`DataFrame.replace` and :meth:`Series.replace` where compiled regular expressions would be ignored during replacement (:issue:`35680`) - Fixed regression in :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` where a list of functions would produce the wrong results if at least one of the functions did not aggregate. (:issue:`35490`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 371b721f08b27..5a215c4cd5fa3 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -2,7 +2,17 @@ import itertools import operator import re -from typing import DefaultDict, Dict, List, Optional, Sequence, Tuple, TypeVar, Union +from typing import ( + DefaultDict, + Dict, + List, + Optional, + Pattern, + Sequence, + Tuple, + TypeVar, + Union, +) import warnings import numpy as np @@ -1907,7 +1917,10 @@ def _merge_blocks( def _compare_or_regex_search( - a: ArrayLike, b: Scalar, regex: bool = False, mask: Optional[ArrayLike] = None + a: ArrayLike, + b: Union[Scalar, Pattern], + regex: bool = False, + mask: Optional[ArrayLike] = None, ) -> Union[ArrayLike, bool]: """ Compare two array_like inputs of the same shape or two scalar values @@ -1918,7 +1931,7 @@ def _compare_or_regex_search( Parameters ---------- a : array_like - b : scalar + b : scalar or regex pattern regex : bool, default False mask : array_like or None (default) @@ -1928,7 +1941,7 @@ def _compare_or_regex_search( """ def _check_comparison_types( - result: Union[ArrayLike, bool], a: ArrayLike, b: Scalar, + result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern], ): """ Raises an error if the two arrays (a,b) cannot be compared. @@ -1949,7 +1962,7 @@ def _check_comparison_types( else: op = np.vectorize( lambda x: bool(re.search(b, x)) - if isinstance(x, str) and isinstance(b, str) + if isinstance(x, str) and isinstance(b, (str, Pattern)) else False ) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index a3f056dbf9648..8603bff0587b6 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1573,3 +1573,11 @@ def test_replace_dict_category_type(self, input_category_df, expected_category_d result = input_df.replace({"a": "z", "obj1": "obj9", "cat1": "catX"}) tm.assert_frame_equal(result, expected) + + def test_replace_with_compiled_regex(self): + # https://github.com/pandas-dev/pandas/issues/35680 + df = pd.DataFrame(["a", "b", "c"]) + regex = re.compile("^a$") + result = df.replace({regex: "z"}, regex=True) + expected = pd.DataFrame(["z", "b", "c"]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 11802c59a29da..f78a28c66e946 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pytest @@ -415,3 +417,11 @@ def test_replace_extension_other(self): # https://github.com/pandas-dev/pandas/issues/34530 ser = pd.Series(pd.array([1, 2, 3], dtype="Int64")) ser.replace("", "") # no exception + + def test_replace_with_compiled_regex(self): + # https://github.com/pandas-dev/pandas/issues/35680 + s = pd.Series(["a", "b", "c"]) + regex = re.compile("^a$") + result = s.replace({regex: "z"}, regex=True) + expected = pd.Series(["z", "b", "c"]) + tm.assert_series_equal(result, expected)