From 4d1a22f2ec9ab6bfe3da9ff7e225532ce6eb7c62 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Thu, 13 Aug 2020 19:28:17 +1000
Subject: [PATCH 1/4] ENH add na_action to DataFrame.applymap

For symmetry with Series.map

Fixes #23803
---
 doc/source/whatsnew/v1.2.0.rst               |  1 +
 pandas/_libs/lib.pyx                         |  8 ++++++-
 pandas/core/frame.py                         | 23 +++++++++++++++++---
 pandas/tests/frame/apply/test_frame_apply.py | 14 ++++++++++++
 4 files changed, 42 insertions(+), 4 deletions(-)
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index deb5697053ea8..d969d0211b0cb 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -50,6 +50,7 @@ For example:
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
+- :meth:`applymap` now supports ``na_action`` (:issue:`23803`)
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
 -
 -
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 5fa91ffee8ea8..fabb25553b1f3 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2360,7 +2360,7 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def map_infer(ndarray arr, object f, bint convert=True):
+def map_infer(ndarray arr, object f, bint convert=True, bint ignore_na=False):
     """
     Substitute for np.vectorize with pandas-friendly dtype inference.
 
@@ -2368,6 +2368,9 @@ def map_infer(ndarray arr, object f, bint convert=True):
     ----------
     arr : ndarray
     f : function
+    convert : bint
+    ignore_na : bint
+        If True, NA values will not have f applied
 
     Returns
     -------
@@ -2381,6 +2384,9 @@ def map_infer(ndarray arr, object f, bint convert=True):
     n = len(arr)
     result = np.empty(n, dtype=object)
     for i in range(n):
+        if ignore_na and checknull(arr[i]):
+            result[i] = arr[i]
+            continue
         val = f(arr[i])
 
         if cnp.PyArray_IsZeroDim(val):
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 547d86f221b5f..301702493a837 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7584,7 +7584,7 @@ def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwds):
         )
         return op.get_result()
 
-    def applymap(self, func) -> "DataFrame":
+    def applymap(self, func, na_action=None) -> "DataFrame":
         """
         Apply a function to a Dataframe elementwise.
 
@@ -7595,6 +7595,10 @@ def applymap(self, func) -> "DataFrame":
         ----------
         func : callable
             Python function, returns a single value from a single value.
+        na_action : {None, 'ignore'}, default None
+            If ‘ignore’, propagate NaN values, without passing them to func.
+
+            .. versionadded:: 1.1
 
         Returns
         -------
@@ -7618,6 +7622,15 @@ def applymap(self, func) -> "DataFrame":
         0  3  4
         1  5  5
 
+        Like Series.map, NA values can be ignored:
+
+        >>> df_copy = df.copy()
+        >>> df_copy.iloc[0, 0] = pd.NA
+        >>> df.applymap(lambda x: len(str(x)), na_action='ignore')
+               0      1
+        0   <NA>  2.120
+        1  3.356  4.567
+
         Note that a vectorized version of `func` often exists, which will
         be much faster. You could square each number elementwise.
 
@@ -7633,11 +7646,15 @@ def applymap(self, func) -> "DataFrame":
         0   1.000000   4.494400
         1  11.262736  20.857489
         """
+        if na_action not in {"ignore", None}:
+            raise ValueError(f"na_action must be 'ignore' or None. Got {na_action!r}")
+        ignore_na = na_action == "ignore"
+
         # if we have a dtype == 'M8[ns]', provide boxed values
         def infer(x):
             if x.empty:
-                return lib.map_infer(x, func)
-            return lib.map_infer(x.astype(object)._values, func)
+                return lib.map_infer(x, func, ignore_na=ignore_na)
+            return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na)
 
         return self.apply(infer)
 
diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py
index 538978358c8e7..f1145828c49e7 100644
--- a/pandas/tests/frame/apply/test_frame_apply.py
+++ b/pandas/tests/frame/apply/test_frame_apply.py
@@ -587,6 +587,20 @@ def test_applymap(self, float_frame):
         tm.assert_frame_equal(applied, float_frame * 2)
         float_frame.applymap(type)
 
+        # GH 23803: na_ignore
+        strlen_frame = float_frame.applymap(lambda x: len(str(x)))
+        float_frame_with_na = float_frame.copy()
+        mask = np.random.randint(0, 2, size=float_frame.shape, dtype=bool)
+        float_frame_with_na[mask] = pd.NA
+        strlen_frame_na_ignore = float_frame_with_na.applymap(
+            lambda x: len(str(x)), na_action="ignore"
+        )
+        strlen_frame_with_na = strlen_frame.copy()
+        strlen_frame_with_na[mask] = pd.NA
+        print(strlen_frame_na_ignore)
+        print(strlen_frame_with_na)
+        tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na)
+
         # GH 465: function returning tuples
         result = float_frame.applymap(lambda x: (x, x))
         assert isinstance(result["A"][0], tuple)

From 735e41bbad7a8b5edcc6eb59d1cd1ee0d3542259 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Thu, 13 Aug 2020 23:02:30 +1000
Subject: [PATCH 2/4] Fix for CI

---
 pandas/core/frame.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 301702493a837..89db80b2504b9 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7626,10 +7626,10 @@ def applymap(self, func, na_action=None) -> "DataFrame":
 
         >>> df_copy = df.copy()
         >>> df_copy.iloc[0, 0] = pd.NA
-        >>> df.applymap(lambda x: len(str(x)), na_action='ignore')
-               0      1
-        0   <NA>  2.120
-        1  3.356  4.567
+        >>> df_copy.applymap(lambda x: len(str(x)), na_action='ignore')
+              0  1
+        0  <NA>  4
+        1     5  5
 
         Note that a vectorized version of `func` often exists, which will
         be much faster. You could square each number elementwise.
@@ -7647,7 +7647,9 @@ def applymap(self, func, na_action=None) -> "DataFrame":
         1  11.262736  20.857489
         """
         if na_action not in {"ignore", None}:
-            raise ValueError(f"na_action must be 'ignore' or None. Got {na_action!r}")
+            raise ValueError(
+                f"na_action must be 'ignore' or None. Got {repr(na_action)}"
+            )
         ignore_na = na_action == "ignore"
 
         # if we have a dtype == 'M8[ns]', provide boxed values

From bdcba459c0983f0aec87bd0f5a6f4c69365babf6 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Sat, 15 Aug 2020 21:00:59 +1000
Subject: [PATCH 3/4] Corrections after reviews

---
 pandas/core/frame.py                         |  4 +--
 pandas/tests/frame/apply/test_frame_apply.py | 30 +++++++++++---------
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 89db80b2504b9..a637edea38e37 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7584,7 +7584,7 @@ def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwds):
         )
         return op.get_result()
 
-    def applymap(self, func, na_action=None) -> "DataFrame":
+    def applymap(self, func, na_action: Optional[str] = None) -> "DataFrame":
         """
         Apply a function to a Dataframe elementwise.
 
@@ -7598,7 +7598,7 @@ def applymap(self, func, na_action=None) -> "DataFrame":
         na_action : {None, 'ignore'}, default None
             If ‘ignore’, propagate NaN values, without passing them to func.
 
-            .. versionadded:: 1.1
+            .. versionadded:: 1.2
 
         Returns
         -------
diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py
index f1145828c49e7..a88314a53c6f7 100644
--- a/pandas/tests/frame/apply/test_frame_apply.py
+++ b/pandas/tests/frame/apply/test_frame_apply.py
@@ -587,20 +587,6 @@ def test_applymap(self, float_frame):
         tm.assert_frame_equal(applied, float_frame * 2)
         float_frame.applymap(type)
 
-        # GH 23803: na_ignore
-        strlen_frame = float_frame.applymap(lambda x: len(str(x)))
-        float_frame_with_na = float_frame.copy()
-        mask = np.random.randint(0, 2, size=float_frame.shape, dtype=bool)
-        float_frame_with_na[mask] = pd.NA
-        strlen_frame_na_ignore = float_frame_with_na.applymap(
-            lambda x: len(str(x)), na_action="ignore"
-        )
-        strlen_frame_with_na = strlen_frame.copy()
-        strlen_frame_with_na[mask] = pd.NA
-        print(strlen_frame_na_ignore)
-        print(strlen_frame_with_na)
-        tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na)
-
         # GH 465: function returning tuples
         result = float_frame.applymap(lambda x: (x, x))
         assert isinstance(result["A"][0], tuple)
@@ -644,6 +630,22 @@ def test_applymap(self, float_frame):
                 result = frame.applymap(func)
                 tm.assert_frame_equal(result, frame)
 
+    def test_applymap_na_ignore(self, float_frame):
+        # GH 23803
+        strlen_frame = float_frame.applymap(lambda x: len(str(x)))
+        float_frame_with_na = float_frame.copy()
+        mask = np.random.randint(0, 2, size=float_frame.shape, dtype=bool)
+        float_frame_with_na[mask] = pd.NA
+        strlen_frame_na_ignore = float_frame_with_na.applymap(
+            lambda x: len(str(x)), na_action="ignore"
+        )
+        strlen_frame_with_na = strlen_frame.copy()
+        strlen_frame_with_na[mask] = pd.NA
+        tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na)
+
+        with pytest.raises(ValueError, match="na_action must be .*Got 'abc'"):
+            float_frame_with_na.applymap(lambda x: len(str(x)), na_action="abc")
+
     def test_applymap_box_timestamps(self):
         # GH 2689, GH 2627
         ser = pd.Series(date_range("1/1/2000", periods=10))

From a0472b3361d020514124322140ab438f124f160a Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Sat, 15 Aug 2020 21:01:56 +1000
Subject: [PATCH 4/4] Fix what's new

---
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index d969d0211b0cb..1e2d5351fc3ee 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -50,7 +50,7 @@ For example:
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
-- :meth:`applymap` now supports ``na_action`` (:issue:`23803`)
+- :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`)
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
 -
 -