Skip to content

ENH: Allow numpy ops and DataFrame properties as str arguments to DataFrame.apply #39118

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ Other enhancements
- Add support for dict-like names in :class:`MultiIndex.set_names` and :class:`MultiIndex.rename` (:issue:`20421`)
- :func:`pandas.read_excel` can now auto detect .xlsb files (:issue:`35416`)
- :meth:`.Rolling.sum`, :meth:`.Expanding.sum`, :meth:`.Rolling.mean`, :meth:`.Expanding.mean`, :meth:`.Rolling.median`, :meth:`.Expanding.median`, :meth:`.Rolling.max`, :meth:`.Expanding.max`, :meth:`.Rolling.min`, and :meth:`.Expanding.min` now support ``Numba`` execution with the ``engine`` keyword (:issue:`38895`)
- :meth:`DataFrame.apply` can now accept NumPy unary operators as strings, e.g. ``df.apply("sqrt")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)
- :meth:`DataFrame.apply` can now accept non-callable DataFrame properties as strings, e.g. ``df.apply("size")``, which was already the case for :meth:`Series.apply` (:issue:`39116`)

.. ---------------------------------------------------------------------------

Expand Down
47 changes: 33 additions & 14 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,11 @@ def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]:
if _axis is None:
_axis = getattr(obj, "axis", 0)

if isinstance(arg, str):
return obj._try_aggregate_string_function(arg, *args, **kwargs), None
elif is_dict_like(arg):
result = self.maybe_apply_str()
if result is not None:
return result, None

if is_dict_like(arg):
arg = cast(AggFuncTypeDict, arg)
return agg_dict_like(obj, arg, _axis), True
elif is_list_like(arg):
Expand All @@ -171,6 +173,28 @@ def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]:
# caller can react
return result, True

def maybe_apply_str(self) -> Optional[FrameOrSeriesUnion]:
"""
Compute apply in case of a string.

Returns
-------
result: Series, DataFrame, or None
Result when self.f is a string, None otherwise.
"""
f = self.f
if not isinstance(f, str):
return None
# Support for `frame.transform('method')`
# Some methods (shift, etc.) require the axis argument, others
# don't, so inspect and insert if necessary.
func = getattr(self.obj, f, None)
if callable(func):
sig = inspect.getfullargspec(func)
if "axis" in sig.args:
self.kwds["axis"] = self.axis
return self.obj._try_aggregate_string_function(f, *self.args, **self.kwds)


class FrameApply(Apply):
obj: DataFrame
Expand Down Expand Up @@ -236,15 +260,9 @@ def apply(self) -> FrameOrSeriesUnion:
return self.apply_empty_result()

# string dispatch
if isinstance(self.f, str):
# Support for `frame.transform('method')`
# Some methods (shift, etc.) require the axis argument, others
# don't, so inspect and insert if necessary.
func = getattr(self.obj, self.f)
sig = inspect.getfullargspec(func)
if "axis" in sig.args:
self.kwds["axis"] = self.axis
return func(*self.args, **self.kwds)
result = self.maybe_apply_str()
if result is not None:
return result

# ufunc
elif isinstance(self.f, np.ufunc):
Expand Down Expand Up @@ -581,8 +599,9 @@ def apply(self) -> FrameOrSeriesUnion:
return obj.aggregate(func, *args, **kwds)

# if we are a string, try to dispatch
if isinstance(func, str):
return obj._try_aggregate_string_function(func, *args, **kwds)
result = self.maybe_apply_str()
if result is not None:
return result

return self.apply_standard()

Expand Down
53 changes: 44 additions & 9 deletions pandas/tests/frame/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,16 @@ def test_apply_standard_nonunique(self):
pytest.param([1, None], {"numeric_only": True}, id="args_and_kwds"),
],
)
def test_apply_with_string_funcs(self, float_frame, func, args, kwds):
result = float_frame.apply(func, *args, **kwds)
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_apply_with_string_funcs(self, request, float_frame, func, args, kwds, how):
if len(args) > 1 and how == "agg":
request.node.add_marker(
pytest.mark.xfail(
reason="agg/apply signature mismatch - agg passes 2nd "
"argument to func"
)
)
result = getattr(float_frame, how)(func, *args, **kwds)
expected = getattr(float_frame, func)(*args, **kwds)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -1314,30 +1322,32 @@ def test_nuiscance_columns(self):
)
tm.assert_frame_equal(result, expected)

def test_non_callable_aggregates(self):
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_non_callable_aggregates(self, how):

# GH 16405
# 'size' is a property of frame/series
# validate that this is working
# GH 39116 - expand to apply
df = DataFrame(
{"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]}
)

# Function aggregate
result = df.agg({"A": "count"})
result = getattr(df, how)({"A": "count"})
expected = Series({"A": 2})

tm.assert_series_equal(result, expected)

# Non-function aggregate
result = df.agg({"A": "size"})
result = getattr(df, how)({"A": "size"})
expected = Series({"A": 3})

tm.assert_series_equal(result, expected)

# Mix function and non-function aggs
result1 = df.agg(["count", "size"])
result2 = df.agg(
result1 = getattr(df, how)(["count", "size"])
result2 = getattr(df, how)(
{"A": ["count", "size"], "B": ["count", "size"], "C": ["count", "size"]}
)
expected = DataFrame(
Expand All @@ -1352,13 +1362,13 @@ def test_non_callable_aggregates(self):
tm.assert_frame_equal(result2, expected, check_like=True)

# Just functional string arg is same as calling df.arg()
result = df.agg("count")
result = getattr(df, how)("count")
expected = df.count()

tm.assert_series_equal(result, expected)

# Just a string attribute arg same as calling df.arg
result = df.agg("size")
result = getattr(df, how)("size")
expected = df.size

assert result == expected
Expand Down Expand Up @@ -1577,3 +1587,28 @@ def test_apply_raw_returns_string():
result = df.apply(lambda x: x[0], axis=1, raw=True)
expected = Series(["aa", "bbb"])
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"op", ["abs", "ceil", "cos", "cumsum", "exp", "log", "sqrt", "square"]
)
@pytest.mark.parametrize("how", ["transform", "apply"])
def test_apply_np_transformer(float_frame, op, how):
# GH 39116
result = getattr(float_frame, how)(op)
expected = getattr(np, op)(float_frame)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("op", ["mean", "median", "std", "var"])
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_apply_np_reducer(float_frame, op, how):
# GH 39116
float_frame = DataFrame({"a": [1, 2], "b": [3, 4]})
result = getattr(float_frame, how)(op)
# pandas ddof defaults to 1, numpy to 0
kwargs = {"ddof": 1} if op in ("std", "var") else {}
expected = Series(
getattr(np, op)(float_frame, axis=0, **kwargs), index=float_frame.columns
)
tm.assert_series_equal(result, expected)
10 changes: 6 additions & 4 deletions pandas/tests/series/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,19 +338,21 @@ def test_reduce(self, string_series):
)
tm.assert_series_equal(result, expected)

def test_non_callable_aggregates(self):
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_non_callable_aggregates(self, how):
# test agg using non-callable series attributes
# GH 39116 - expand to apply
s = Series([1, 2, None])

# Calling agg w/ just a string arg same as calling s.arg
result = s.agg("size")
result = getattr(s, how)("size")
expected = s.size
assert result == expected

# test when mixed w/ callable reducers
result = s.agg(["size", "count", "mean"])
result = getattr(s, how)(["size", "count", "mean"])
expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5})
tm.assert_series_equal(result[expected.index], expected)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"series, func, expected",
Expand Down