diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 2afa1f1a6199e..25f845a8bf012 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -100,8 +100,8 @@ For example: Other enhancements ^^^^^^^^^^^^^^^^^^ - - Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a ``Series`` or ``DataFrame`` (:issue:`28394`) +- :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`) - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`) - :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`) - diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index eadfcefaac73d..7464fafee2b94 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2377,7 +2377,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr @cython.boundscheck(False) @cython.wraparound(False) -def map_infer(ndarray arr, object f, bint convert=True): +def map_infer(ndarray arr, object f, bint convert=True, bint ignore_na=False): """ Substitute for np.vectorize with pandas-friendly dtype inference. @@ -2385,6 +2385,9 @@ def map_infer(ndarray arr, object f, bint convert=True): ---------- arr : ndarray f : function + convert : bint + ignore_na : bint + If True, NA values will not have f applied Returns ------- @@ -2398,6 +2401,9 @@ def map_infer(ndarray arr, object f, bint convert=True): n = len(arr) result = np.empty(n, dtype=object) for i in range(n): + if ignore_na and checknull(arr[i]): + result[i] = arr[i] + continue val = f(arr[i]) if cnp.PyArray_IsZeroDim(val): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e1a889bf79d95..647857c8bab67 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7617,7 +7617,7 @@ def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwds): ) return op.get_result() - def applymap(self, func) -> DataFrame: + def applymap(self, func, na_action: Optional[str] = None) -> DataFrame: """ Apply a function to a Dataframe elementwise. @@ -7628,6 +7628,10 @@ def applymap(self, func) -> DataFrame: ---------- func : callable Python function, returns a single value from a single value. + na_action : {None, 'ignore'}, default None + If ‘ignore’, propagate NaN values, without passing them to func. + + .. versionadded:: 1.2 Returns ------- @@ -7651,6 +7655,15 @@ def applymap(self, func) -> DataFrame: 0 3 4 1 5 5 + Like Series.map, NA values can be ignored: + + >>> df_copy = df.copy() + >>> df_copy.iloc[0, 0] = pd.NA + >>> df_copy.applymap(lambda x: len(str(x)), na_action='ignore') + 0 1 + 0 4 + 1 5 5 + Note that a vectorized version of `func` often exists, which will be much faster. You could square each number elementwise. @@ -7666,11 +7679,17 @@ def applymap(self, func) -> DataFrame: 0 1.000000 4.494400 1 11.262736 20.857489 """ + if na_action not in {"ignore", None}: + raise ValueError( + f"na_action must be 'ignore' or None. Got {repr(na_action)}" + ) + ignore_na = na_action == "ignore" + # if we have a dtype == 'M8[ns]', provide boxed values def infer(x): if x.empty: - return lib.map_infer(x, func) - return lib.map_infer(x.astype(object)._values, func) + return lib.map_infer(x, func, ignore_na=ignore_na) + return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na) return self.apply(infer) diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py index bc09501583e2c..1662f9e2fff56 100644 --- a/pandas/tests/frame/apply/test_frame_apply.py +++ b/pandas/tests/frame/apply/test_frame_apply.py @@ -630,6 +630,22 @@ def test_applymap(self, float_frame): result = frame.applymap(func) tm.assert_frame_equal(result, frame) + def test_applymap_na_ignore(self, float_frame): + # GH 23803 + strlen_frame = float_frame.applymap(lambda x: len(str(x))) + float_frame_with_na = float_frame.copy() + mask = np.random.randint(0, 2, size=float_frame.shape, dtype=bool) + float_frame_with_na[mask] = pd.NA + strlen_frame_na_ignore = float_frame_with_na.applymap( + lambda x: len(str(x)), na_action="ignore" + ) + strlen_frame_with_na = strlen_frame.copy() + strlen_frame_with_na[mask] = pd.NA + tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na) + + with pytest.raises(ValueError, match="na_action must be .*Got 'abc'"): + float_frame_with_na.applymap(lambda x: len(str(x)), na_action="abc") + def test_applymap_box_timestamps(self): # GH 2689, GH 2627 ser = pd.Series(date_range("1/1/2000", periods=10))