Skip to content

Commit b2dda5a

Browse files
authored
ENH add na_action to DataFrame.applymap (#35704)
1 parent ddf2f05 commit b2dda5a

File tree

4 files changed

+46
-5
lines changed

4 files changed

+46
-5
lines changed

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ For example:
100100

101101
Other enhancements
102102
^^^^^^^^^^^^^^^^^^
103-
104103
- Added :meth:`~DataFrame.set_flags` for setting table-wide flags on a ``Series`` or ``DataFrame`` (:issue:`28394`)
104+
- :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`)
105105
- :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
106106
- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
107107
-

pandas/_libs/lib.pyx

+7-1
Original file line numberDiff line numberDiff line change
@@ -2377,14 +2377,17 @@ def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=Tr
23772377

23782378
@cython.boundscheck(False)
23792379
@cython.wraparound(False)
2380-
def map_infer(ndarray arr, object f, bint convert=True):
2380+
def map_infer(ndarray arr, object f, bint convert=True, bint ignore_na=False):
23812381
"""
23822382
Substitute for np.vectorize with pandas-friendly dtype inference.
23832383
23842384
Parameters
23852385
----------
23862386
arr : ndarray
23872387
f : function
2388+
convert : bint
2389+
ignore_na : bint
2390+
If True, NA values will not have f applied
23882391
23892392
Returns
23902393
-------
@@ -2398,6 +2401,9 @@ def map_infer(ndarray arr, object f, bint convert=True):
23982401
n = len(arr)
23992402
result = np.empty(n, dtype=object)
24002403
for i in range(n):
2404+
if ignore_na and checknull(arr[i]):
2405+
result[i] = arr[i]
2406+
continue
24012407
val = f(arr[i])
24022408

24032409
if cnp.PyArray_IsZeroDim(val):

pandas/core/frame.py

+22-3
Original file line numberDiff line numberDiff line change
@@ -7619,7 +7619,7 @@ def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwds):
76197619
)
76207620
return op.get_result()
76217621

7622-
def applymap(self, func) -> DataFrame:
7622+
def applymap(self, func, na_action: Optional[str] = None) -> DataFrame:
76237623
"""
76247624
Apply a function to a Dataframe elementwise.
76257625
@@ -7630,6 +7630,10 @@ def applymap(self, func) -> DataFrame:
76307630
----------
76317631
func : callable
76327632
Python function, returns a single value from a single value.
7633+
na_action : {None, 'ignore'}, default None
7634+
If ‘ignore’, propagate NaN values, without passing them to func.
7635+
7636+
.. versionadded:: 1.2
76337637
76347638
Returns
76357639
-------
@@ -7653,6 +7657,15 @@ def applymap(self, func) -> DataFrame:
76537657
0 3 4
76547658
1 5 5
76557659
7660+
Like Series.map, NA values can be ignored:
7661+
7662+
>>> df_copy = df.copy()
7663+
>>> df_copy.iloc[0, 0] = pd.NA
7664+
>>> df_copy.applymap(lambda x: len(str(x)), na_action='ignore')
7665+
0 1
7666+
0 <NA> 4
7667+
1 5 5
7668+
76567669
Note that a vectorized version of `func` often exists, which will
76577670
be much faster. You could square each number elementwise.
76587671
@@ -7668,11 +7681,17 @@ def applymap(self, func) -> DataFrame:
76687681
0 1.000000 4.494400
76697682
1 11.262736 20.857489
76707683
"""
7684+
if na_action not in {"ignore", None}:
7685+
raise ValueError(
7686+
f"na_action must be 'ignore' or None. Got {repr(na_action)}"
7687+
)
7688+
ignore_na = na_action == "ignore"
7689+
76717690
# if we have a dtype == 'M8[ns]', provide boxed values
76727691
def infer(x):
76737692
if x.empty:
7674-
return lib.map_infer(x, func)
7675-
return lib.map_infer(x.astype(object)._values, func)
7693+
return lib.map_infer(x, func, ignore_na=ignore_na)
7694+
return lib.map_infer(x.astype(object)._values, func, ignore_na=ignore_na)
76767695

76777696
return self.apply(infer)
76787697

pandas/tests/frame/apply/test_frame_apply.py

+16
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,22 @@ def test_applymap(self, float_frame):
630630
result = frame.applymap(func)
631631
tm.assert_frame_equal(result, frame)
632632

633+
def test_applymap_na_ignore(self, float_frame):
634+
# GH 23803
635+
strlen_frame = float_frame.applymap(lambda x: len(str(x)))
636+
float_frame_with_na = float_frame.copy()
637+
mask = np.random.randint(0, 2, size=float_frame.shape, dtype=bool)
638+
float_frame_with_na[mask] = pd.NA
639+
strlen_frame_na_ignore = float_frame_with_na.applymap(
640+
lambda x: len(str(x)), na_action="ignore"
641+
)
642+
strlen_frame_with_na = strlen_frame.copy()
643+
strlen_frame_with_na[mask] = pd.NA
644+
tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na)
645+
646+
with pytest.raises(ValueError, match="na_action must be .*Got 'abc'"):
647+
float_frame_with_na.applymap(lambda x: len(str(x)), na_action="abc")
648+
633649
def test_applymap_box_timestamps(self):
634650
# GH 2689, GH 2627
635651
ser = pd.Series(date_range("1/1/2000", periods=10))

0 commit comments

Comments
 (0)