Skip to content

Commit d329989

Browse files
rhshadrachluckyvs1
authored andcommitted
ENH: DataFrame.apply to accept numpy ops as strings (pandas-dev#39118)
1 parent 3fdffdc commit d329989

File tree

4 files changed

+85
-27
lines changed

4 files changed

+85
-27
lines changed

doc/source/whatsnew/v1.3.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ Other enhancements
5454
- Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`)
5555
- :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)
5656
- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
57+
- :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
58+
- :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
5759

5860
.. ---------------------------------------------------------------------------
5961

pandas/core/apply.py

+33-14
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,11 @@ def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]:
151151
if _axis is None:
152152
_axis = getattr(obj, "axis", 0)
153153

154-
if isinstance(arg, str):
155-
return obj._try_aggregate_string_function(arg, *args, **kwargs), None
156-
elif is_dict_like(arg):
154+
result = self.maybe_apply_str()
155+
if result is not None:
156+
return result, None
157+
158+
if is_dict_like(arg):
157159
arg = cast(AggFuncTypeDict, arg)
158160
return agg_dict_like(obj, arg, _axis), True
159161
elif is_list_like(arg):
@@ -171,6 +173,28 @@ def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]:
171173
# caller can react
172174
return result, True
173175

176+
def maybe_apply_str(self) -> Optional[FrameOrSeriesUnion]:
177+
"""
178+
Compute apply in case of a string.
179+
180+
Returns
181+
-------
182+
result: Series, DataFrame, or None
183+
Result when self.f is a string, None otherwise.
184+
"""
185+
f = self.f
186+
if not isinstance(f, str):
187+
return None
188+
# Support for `frame.transform('method')`
189+
# Some methods (shift, etc.) require the axis argument, others
190+
# don't, so inspect and insert if necessary.
191+
func = getattr(self.obj, f, None)
192+
if callable(func):
193+
sig = inspect.getfullargspec(func)
194+
if "axis" in sig.args:
195+
self.kwds["axis"] = self.axis
196+
return self.obj._try_aggregate_string_function(f, *self.args, **self.kwds)
197+
174198

175199
class FrameApply(Apply):
176200
obj: DataFrame
@@ -236,15 +260,9 @@ def apply(self) -> FrameOrSeriesUnion:
236260
return self.apply_empty_result()
237261

238262
# string dispatch
239-
if isinstance(self.f, str):
240-
# Support for `frame.transform('method')`
241-
# Some methods (shift, etc.) require the axis argument, others
242-
# don't, so inspect and insert if necessary.
243-
func = getattr(self.obj, self.f)
244-
sig = inspect.getfullargspec(func)
245-
if "axis" in sig.args:
246-
self.kwds["axis"] = self.axis
247-
return func(*self.args, **self.kwds)
263+
result = self.maybe_apply_str()
264+
if result is not None:
265+
return result
248266

249267
# ufunc
250268
elif isinstance(self.f, np.ufunc):
@@ -581,8 +599,9 @@ def apply(self) -> FrameOrSeriesUnion:
581599
return obj.aggregate(func, *args, **kwds)
582600

583601
# if we are a string, try to dispatch
584-
if isinstance(func, str):
585-
return obj._try_aggregate_string_function(func, *args, **kwds)
602+
result = self.maybe_apply_str()
603+
if result is not None:
604+
return result
586605

587606
return self.apply_standard()
588607

pandas/tests/frame/apply/test_frame_apply.py

+44-9
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,16 @@ def test_apply_standard_nonunique(self):
166166
pytest.param([1, None], {"numeric_only": True}, id="args_and_kwds"),
167167
],
168168
)
169-
def test_apply_with_string_funcs(self, float_frame, func, args, kwds):
170-
result = float_frame.apply(func, *args, **kwds)
169+
@pytest.mark.parametrize("how", ["agg", "apply"])
170+
def test_apply_with_string_funcs(self, request, float_frame, func, args, kwds, how):
171+
if len(args) > 1 and how == "agg":
172+
request.node.add_marker(
173+
pytest.mark.xfail(
174+
reason="agg/apply signature mismatch - agg passes 2nd "
175+
"argument to func"
176+
)
177+
)
178+
result = getattr(float_frame, how)(func, *args, **kwds)
171179
expected = getattr(float_frame, func)(*args, **kwds)
172180
tm.assert_series_equal(result, expected)
173181

@@ -1314,30 +1322,32 @@ def test_nuiscance_columns(self):
13141322
)
13151323
tm.assert_frame_equal(result, expected)
13161324

1317-
def test_non_callable_aggregates(self):
1325+
@pytest.mark.parametrize("how", ["agg", "apply"])
1326+
def test_non_callable_aggregates(self, how):
13181327

13191328
# GH 16405
13201329
# 'size' is a property of frame/series
13211330
# validate that this is working
1331+
# GH 39116 - expand to apply
13221332
df = DataFrame(
13231333
{"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]}
13241334
)
13251335

13261336
# Function aggregate
1327-
result = df.agg({"A": "count"})
1337+
result = getattr(df, how)({"A": "count"})
13281338
expected = Series({"A": 2})
13291339

13301340
tm.assert_series_equal(result, expected)
13311341

13321342
# Non-function aggregate
1333-
result = df.agg({"A": "size"})
1343+
result = getattr(df, how)({"A": "size"})
13341344
expected = Series({"A": 3})
13351345

13361346
tm.assert_series_equal(result, expected)
13371347

13381348
# Mix function and non-function aggs
1339-
result1 = df.agg(["count", "size"])
1340-
result2 = df.agg(
1349+
result1 = getattr(df, how)(["count", "size"])
1350+
result2 = getattr(df, how)(
13411351
{"A": ["count", "size"], "B": ["count", "size"], "C": ["count", "size"]}
13421352
)
13431353
expected = DataFrame(
@@ -1352,13 +1362,13 @@ def test_non_callable_aggregates(self):
13521362
tm.assert_frame_equal(result2, expected, check_like=True)
13531363

13541364
# Just functional string arg is same as calling df.arg()
1355-
result = df.agg("count")
1365+
result = getattr(df, how)("count")
13561366
expected = df.count()
13571367

13581368
tm.assert_series_equal(result, expected)
13591369

13601370
# Just a string attribute arg same as calling df.arg
1361-
result = df.agg("size")
1371+
result = getattr(df, how)("size")
13621372
expected = df.size
13631373

13641374
assert result == expected
@@ -1577,3 +1587,28 @@ def test_apply_raw_returns_string():
15771587
result = df.apply(lambda x: x[0], axis=1, raw=True)
15781588
expected = Series(["aa", "bbb"])
15791589
tm.assert_series_equal(result, expected)
1590+
1591+
1592+
@pytest.mark.parametrize(
1593+
"op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"]
1594+
)
1595+
@pytest.mark.parametrize("how", ["transform", "apply"])
1596+
def test_apply_np_transformer(float_frame, op, how):
1597+
# GH 39116
1598+
result = getattr(float_frame, how)(op)
1599+
expected = getattr(np, op)(float_frame)
1600+
tm.assert_frame_equal(result, expected)
1601+
1602+
1603+
@pytest.mark.parametrize("op", ["mean", "median", "std", "var"])
1604+
@pytest.mark.parametrize("how", ["agg", "apply"])
1605+
def test_apply_np_reducer(float_frame, op, how):
1606+
# GH 39116
1607+
float_frame = DataFrame({"a": [1, 2], "b": [3, 4]})
1608+
result = getattr(float_frame, how)(op)
1609+
# pandas ddof defaults to 1, numpy to 0
1610+
kwargs = {"ddof": 1} if op in ("std", "var") else {}
1611+
expected = Series(
1612+
getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns
1613+
)
1614+
tm.assert_series_equal(result, expected)

pandas/tests/series/apply/test_series_apply.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -338,19 +338,21 @@ def test_reduce(self, string_series):
338338
)
339339
tm.assert_series_equal(result, expected)
340340

341-
def test_non_callable_aggregates(self):
341+
@pytest.mark.parametrize("how", ["agg", "apply"])
342+
def test_non_callable_aggregates(self, how):
342343
# test agg using non-callable series attributes
344+
# GH 39116 - expand to apply
343345
s = Series([1, 2, None])
344346

345347
# Calling agg w/ just a string arg same as calling s.arg
346-
result = s.agg("size")
348+
result = getattr(s, how)("size")
347349
expected = s.size
348350
assert result == expected
349351

350352
# test when mixed w/ callable reducers
351-
result = s.agg(["size", "count", "mean"])
353+
result = getattr(s, how)(["size", "count", "mean"])
352354
expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5})
353-
tm.assert_series_equal(result[expected.index], expected)
355+
tm.assert_series_equal(result, expected)
354356

355357
@pytest.mark.parametrize(
356358
"series, func, expected",

0 commit comments

Comments
 (0)