diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7263329d2e53b..dfbebae8d42b6 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -296,6 +296,7 @@ Bug fixes - Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`) - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) +- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) Categorical ^^^^^^^^^^^ diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index 6cc867c60fd82..f946c5adcbb0b 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -93,17 +93,18 @@ def _check_comparison_types( ) # GH#32621 use mask to avoid comparing to NAs - if isinstance(a, np.ndarray): + if isinstance(a, np.ndarray) and mask is not None: a = a[mask] - - result = op(a) - - if isinstance(result, np.ndarray) and mask is not None: - # The shape of the mask can differ to that of the result - # since we may compare only a subset of a's or b's elements - tmp = np.zeros(mask.shape, dtype=np.bool_) - np.place(tmp, mask, result) - result = tmp + result = op(a) + + if isinstance(result, np.ndarray): + # The shape of the mask can differ to that of the result + # since we may compare only a subset of a's or b's elements + tmp = np.zeros(mask.shape, dtype=np.bool_) + np.place(tmp, mask, result) + result = tmp + else: + result = op(a) _check_comparison_types(result, a, b) return result diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index d4ec09332ad97..c7b894e73d0dd 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -616,7 +616,8 @@ def test_replace_with_compiled_regex(self): def test_pandas_replace_na(self): # GH#43344 - ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA], dtype="string") + # GH#56599 + ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA, "AA"], dtype="string") regex_mapping = { "AA": "CC", "BB": "CC", @@ -624,7 +625,9 @@ def test_pandas_replace_na(self): "CC": "CC-REPL", } result = ser.replace(regex_mapping, regex=True) - exp = pd.Series(["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA], dtype="string") + exp = pd.Series( + ["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA, "CC"], dtype="string" + ) tm.assert_series_equal(result, exp) @pytest.mark.parametrize(