diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 033f47f0c994d..f734db1419282 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -980,6 +980,8 @@ Missing - Bug in :meth:`Series.map` caused incorrect result when data has NaNs and defaultdict mapping was used (:issue:`48813`) - Bug in :class:`NA` raising a ``TypeError`` instead of return :class:`NA` when performing a binary operation with a ``bytes`` object (:issue:`49108`) - Bug in :meth:`DataFrame.update` with ``overwrite=False`` raising ``TypeError`` when ``self`` has column with ``NaT`` values and column not present in ``other`` (:issue:`16713`) +- Bug in :meth:`Series.replace` raising ``RecursionError`` when replacing value in object-dtype :class:`Series` containing ``NA`` (:issue:`47480`) +- Bug in :meth:`Series.replace` raising ``RecursionError`` when replacing value in numeric :class:`Series` with ``NA`` (:issue:`50758`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f7787aa52623b..4bb4882574228 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -21,6 +21,7 @@ writers, ) from pandas._libs.internals import BlockPlacement +from pandas._libs.missing import NA from pandas._libs.tslibs import IncompatibleFrequency from pandas._typing import ( ArrayLike, @@ -569,7 +570,7 @@ def replace( return blocks elif self.ndim == 1 or self.shape[0] == 1: - if value is None: + if value is None or value is NA: blk = self.astype(np.dtype(object)) else: blk = self.coerce_to_target_dtype(value) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 0da4f6404c3cc..162186bc4186a 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -32,6 +32,7 @@ from pandas.core.dtypes.common import ( is_array_like, is_numeric_v_string_like, + is_object_dtype, needs_i8_conversion, ) from pandas.core.dtypes.missing import ( @@ -83,6 +84,12 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: # _DTypeDict, Tuple[Any, Any]]]" values_to_mask = np.array(values_to_mask, dtype=dtype) # type: ignore[arg-type] + potential_na = False + if is_object_dtype(arr): + # pre-compute mask to avoid comparison to NA + potential_na = True + arr_mask = ~isna(arr) + na_mask = isna(values_to_mask) nonna = values_to_mask[~na_mask] @@ -93,10 +100,15 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: # GH#29553 prevent numpy deprecation warnings pass else: - new_mask = arr == x - if not isinstance(new_mask, np.ndarray): - # usually BooleanArray - new_mask = new_mask.to_numpy(dtype=bool, na_value=False) + if potential_na: + new_mask = np.zeros(arr.shape, dtype=np.bool_) + new_mask[arr_mask] = arr[arr_mask] == x + else: + new_mask = arr == x + + if not isinstance(new_mask, np.ndarray): + # usually BooleanArray + new_mask = new_mask.to_numpy(dtype=bool, na_value=False) mask |= new_mask if na_mask.any(): diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 1188c9452520c..44c224c120ea6 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -690,3 +690,25 @@ def test_replace_change_dtype_series(self): df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]}) df["Test"] = df["Test"].fillna(np.nan) tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("dtype", ["object", "Int64"]) + def test_replace_na_in_obj_column(self, dtype): + # GH#47480 + ser = pd.Series([0, 1, pd.NA], dtype=dtype) + expected = pd.Series([0, 2, pd.NA], dtype=dtype) + result = ser.replace(to_replace=1, value=2) + tm.assert_series_equal(result, expected) + + ser.replace(to_replace=1, value=2, inplace=True) + tm.assert_series_equal(ser, expected) + + @pytest.mark.parametrize("val", [0, 0.5]) + def test_replace_numeric_column_with_na(self, val): + # GH#50758 + ser = pd.Series([val, 1]) + expected = pd.Series([val, pd.NA]) + result = ser.replace(to_replace=1, value=pd.NA) + tm.assert_series_equal(result, expected) + + ser.replace(to_replace=1, value=pd.NA, inplace=True) + tm.assert_series_equal(ser, expected)