From 4d1a22f2ec9ab6bfe3da9ff7e225532ce6eb7c62 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 13 Aug 2020 19:28:17 +1000 Subject: [PATCH 1/4] ENH add na_action to DataFrame.applymap For symmetry with Series.map Fixes #23803 --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/_libs/lib.pyx | 8 ++++++- pandas/core/frame.py | 23 +++++++++++++++++--- pandas/tests/frame/apply/test_frame_apply.py | 14 ++++++++++++ 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index deb5697053ea8..d969d0211b0cb 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -50,6 +50,7 @@ For example: Other enhancements ^^^^^^^^^^^^^^^^^^ +- :meth:`applymap` now supports ``na_action`` (:issue:`23803`) - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`) - - diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5fa91ffee8ea8..fabb25553b1f3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2360,7 +2360,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr @cython.boundscheck(False) @cython.wraparound(False) -def map_infer(ndarray arr, object f, bint convert=True): +def map_infer(ndarray arr, object f, bint convert=True, bint ignore_na=False): """ Substitute for np.vectorize with pandas-friendly dtype inference. @@ -2368,6 +2368,9 @@ def map_infer(ndarray arr, object f, bint convert=True): ---------- arr : ndarray f : function + convert : bint + ignore_na : bint + If True, NA values will not have f applied Returns ------- @@ -2381,6 +2384,9 @@ def map_infer(ndarray arr, object f, bint convert=True): n = len(arr) result = np.empty(n, dtype=object) for i in range(n): + if ignore_na and checknull(arr[i]): + result[i] = arr[i] + continue val = f(arr[i]) if cnp.PyArray_IsZeroDim(val): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 547d86f221b5f..301702493a837 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7584,7 +7584,7 @@ def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwds): ) return op.get_result() - def applymap(self, func) -> "DataFrame": + def applymap(self, func, na_action=None) -> "DataFrame": """ Apply a function to a Dataframe elementwise. @@ -7595,6 +7595,10 @@ def applymap(self, func) -> "DataFrame": ---------- func : callable Python function, returns a single value from a single value. + na_action : {None, 'ignore'}, default None + If ‘ignore’, propagate NaN values, without passing them to func. + + .. versionadded:: 1.1 Returns ------- @@ -7618,6 +7622,15 @@ def applymap(self, func) -> "DataFrame": 0 3 4 1 5 5 + Like Series.map, NA values can be ignored: + + >>> df_copy = df.copy() + >>> df_copy.iloc[0, 0] = pd.NA + >>> df.applymap(lambda x: len(str(x)), na_action='ignore') + 0 1 + 0 2.120 + 1 3.356 4.567 + Note that a vectorized version of `func` often exists, which will be much faster. You could square each number elementwise. @@ -7633,11 +7646,15 @@ def applymap(self, func) -> "DataFrame": 0 1.000000 4.494400 1 11.262736 20.857489 """ + if na_action not in {"ignore", None}: + raise ValueError(f"na_action must be 'ignore' or None. Got {na_action!r}") + ignore_na = na_action == "ignore" + # if we have a dtype == 'M8[ns]', provide boxed values def infer(x): if x.empty: - return lib.map_infer(x, func) - return lib.map_infer(x.astype(object)._values, func) + return lib.map_infer(x, func, ignore_na=ignore_na) + return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na) return self.apply(infer) diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py index 538978358c8e7..f1145828c49e7 100644 --- a/pandas/tests/frame/apply/test_frame_apply.py +++ b/pandas/tests/frame/apply/test_frame_apply.py @@ -587,6 +587,20 @@ def test_applymap(self, float_frame): tm.assert_frame_equal(applied, float_frame * 2) float_frame.applymap(type) + # GH 23803: na_ignore + strlen_frame = float_frame.applymap(lambda x: len(str(x))) + float_frame_with_na = float_frame.copy() + mask = np.random.randint(0, 2, size=float_frame.shape, dtype=bool) + float_frame_with_na[mask] = pd.NA + strlen_frame_na_ignore = float_frame_with_na.applymap( + lambda x: len(str(x)), na_action="ignore" + ) + strlen_frame_with_na = strlen_frame.copy() + strlen_frame_with_na[mask] = pd.NA + print(strlen_frame_na_ignore) + print(strlen_frame_with_na) + tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na) + # GH 465: function returning tuples result = float_frame.applymap(lambda x: (x, x)) assert isinstance(result["A"][0], tuple) From 735e41bbad7a8b5edcc6eb59d1cd1ee0d3542259 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Thu, 13 Aug 2020 23:02:30 +1000 Subject: [PATCH 2/4] Fix for CI --- pandas/core/frame.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 301702493a837..89db80b2504b9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7626,10 +7626,10 @@ def applymap(self, func, na_action=None) -> "DataFrame": >>> df_copy = df.copy() >>> df_copy.iloc[0, 0] = pd.NA - >>> df.applymap(lambda x: len(str(x)), na_action='ignore') - 0 1 - 0 2.120 - 1 3.356 4.567 + >>> df_copy.applymap(lambda x: len(str(x)), na_action='ignore') + 0 1 + 0 4 + 1 5 5 Note that a vectorized version of `func` often exists, which will be much faster. You could square each number elementwise. @@ -7647,7 +7647,9 @@ def applymap(self, func, na_action=None) -> "DataFrame": 1 11.262736 20.857489 """ if na_action not in {"ignore", None}: - raise ValueError(f"na_action must be 'ignore' or None. Got {na_action!r}") + raise ValueError( + f"na_action must be 'ignore' or None. Got {repr(na_action)}" + ) ignore_na = na_action == "ignore" # if we have a dtype == 'M8[ns]', provide boxed values From bdcba459c0983f0aec87bd0f5a6f4c69365babf6 Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sat, 15 Aug 2020 21:00:59 +1000 Subject: [PATCH 3/4] Corrections after reviews --- pandas/core/frame.py | 4 +-- pandas/tests/frame/apply/test_frame_apply.py | 30 +++++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 89db80b2504b9..a637edea38e37 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7584,7 +7584,7 @@ def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwds): ) return op.get_result() - def applymap(self, func, na_action=None) -> "DataFrame": + def applymap(self, func, na_action: Optional[str] = None) -> "DataFrame": """ Apply a function to a Dataframe elementwise. @@ -7598,7 +7598,7 @@ def applymap(self, func, na_action=None) -> "DataFrame": na_action : {None, 'ignore'}, default None If ‘ignore’, propagate NaN values, without passing them to func. - .. versionadded:: 1.1 + .. versionadded:: 1.2 Returns ------- diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py index f1145828c49e7..a88314a53c6f7 100644 --- a/pandas/tests/frame/apply/test_frame_apply.py +++ b/pandas/tests/frame/apply/test_frame_apply.py @@ -587,20 +587,6 @@ def test_applymap(self, float_frame): tm.assert_frame_equal(applied, float_frame * 2) float_frame.applymap(type) - # GH 23803: na_ignore - strlen_frame = float_frame.applymap(lambda x: len(str(x))) - float_frame_with_na = float_frame.copy() - mask = np.random.randint(0, 2, size=float_frame.shape, dtype=bool) - float_frame_with_na[mask] = pd.NA - strlen_frame_na_ignore = float_frame_with_na.applymap( - lambda x: len(str(x)), na_action="ignore" - ) - strlen_frame_with_na = strlen_frame.copy() - strlen_frame_with_na[mask] = pd.NA - print(strlen_frame_na_ignore) - print(strlen_frame_with_na) - tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na) - # GH 465: function returning tuples result = float_frame.applymap(lambda x: (x, x)) assert isinstance(result["A"][0], tuple) @@ -644,6 +630,22 @@ def test_applymap(self, float_frame): result = frame.applymap(func) tm.assert_frame_equal(result, frame) + def test_applymap_na_ignore(self, float_frame): + # GH 23803 + strlen_frame = float_frame.applymap(lambda x: len(str(x))) + float_frame_with_na = float_frame.copy() + mask = np.random.randint(0, 2, size=float_frame.shape, dtype=bool) + float_frame_with_na[mask] = pd.NA + strlen_frame_na_ignore = float_frame_with_na.applymap( + lambda x: len(str(x)), na_action="ignore" + ) + strlen_frame_with_na = strlen_frame.copy() + strlen_frame_with_na[mask] = pd.NA + tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na) + + with pytest.raises(ValueError, match="na_action must be .*Got 'abc'"): + float_frame_with_na.applymap(lambda x: len(str(x)), na_action="abc") + def test_applymap_box_timestamps(self): # GH 2689, GH 2627 ser = pd.Series(date_range("1/1/2000", periods=10)) From a0472b3361d020514124322140ab438f124f160a Mon Sep 17 00:00:00 2001 From: Joel Nothman Date: Sat, 15 Aug 2020 21:01:56 +1000 Subject: [PATCH 4/4] Fix what's new --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d969d0211b0cb..1e2d5351fc3ee 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -50,7 +50,7 @@ For example: Other enhancements ^^^^^^^^^^^^^^^^^^ -- :meth:`applymap` now supports ``na_action`` (:issue:`23803`) +- :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`) - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`) - -