diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 0d058ead9d22c..b23c34fca5f6c 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -88,12 +88,27 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: # GH 21977 mask = np.zeros(arr.shape, dtype=bool) + + # GH#47480 'arr == x' returns scalar when arr is numpy array containing NAs + # Usually when Series has dtype object with NAs + is_numpy_arr = isinstance(arr, np.ndarray) + if is_numpy_arr: + arr_nas = isna(arr) + else: + arr_nas = mask # Avoid possibility of NameError on new_mask[~arr_nas] + is_numpy_arr_with_nas = is_numpy_arr and arr_nas.any() + for x in nonna: if is_numeric_v_string_like(arr, x): # GH#29553 prevent numpy deprecation warnings pass else: - new_mask = arr == x + if is_numpy_arr_with_nas: # GH#47480 + new_mask = np.zeros_like(arr, dtype=bool) + new_mask[~arr_nas] = arr[~arr_nas] == x + else: + new_mask = arr == x + if not isinstance(new_mask, np.ndarray): # usually BooleanArray new_mask = new_mask.to_numpy(dtype=bool, na_value=False) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 1923299476a32..e453d9bc93af4 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1503,6 +1503,14 @@ def test_replace_value_none_dtype_numeric(self, val): result = df.replace({val: None}) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("dtype", [None, "Int64", "Float64"]) + def test_replace_df_containing_na(self, dtype): + # GH#47480 + df = DataFrame({"A": [4, 1, pd.NA], "B": [1, 3, 2]}, dtype=dtype) + df.replace(to_replace=1, value=100, inplace=True) + expected = DataFrame({"A": [4, 100, pd.NA], "B": [100, 3, 2]}, dtype=dtype) + tm.assert_frame_equal(df, expected) + class TestDataFrameReplaceRegex: @pytest.mark.parametrize( diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 59afe22e40f7a..b329dc6dd1a87 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -674,3 +674,11 @@ def test_replace_value_none_dtype_numeric(self, val): result = ser.replace(val, None) expected = pd.Series([1, None], dtype=object) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, "Int64", "Float64"]) + def test_replace_series_containing_na(self, dtype): + # GH#47480 + df = pd.Series([4, 1, pd.NA, 1], dtype=dtype) + df.replace(to_replace=1, value=100, inplace=True) + expected = pd.Series([4, 100, pd.NA, 100], dtype=dtype) + tm.assert_series_equal(df, expected)