Skip to content

DEPR: apply/agg using agg/apply #42833

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ def f(x):
self.orig_f: AggFuncType = func
self.f: AggFuncType = f

# For deprecation warning on apply using agg
self.apply_used_agg = False
self.agg_used_apply = False

@abc.abstractmethod
def apply(self) -> DataFrame | Series:
pass
Expand Down Expand Up @@ -514,7 +518,9 @@ def apply_multiple(self) -> DataFrame | Series:
result: Series, DataFrame, or None
Result when self.f is a list-like or dict-like, None otherwise.
"""
return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs)
self.apply_used_agg = True
_, result = self.obj._aggregate(self.f, self.axis, *self.args, **self.kwargs)
return result

def normalize_dictlike_arg(
self, how: str, obj: DataFrame | Series, func: AggFuncTypeDict
Expand Down Expand Up @@ -711,7 +717,8 @@ def agg(self):
result = result.T if result is not None else result

if result is None:
result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs)
self.agg_used_apply = True
_, result = obj._apply(self.orig_f, axis, args=self.args, **self.kwargs)

return result

Expand Down
6 changes: 6 additions & 0 deletions pandas/core/arraylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,12 @@ def reconstruct(result):
elif self.ndim == 1:
# ufunc(series, ...)
inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
if method == "apply":
# Avoid deprecation warnings from an internal call
method = "_apply"
elif method == "agg":
# Avoid deprecation warnings from an internal call
method = "_agg"
result = getattr(ufunc, method)(*inputs, **kwargs)
else:
# ufunc(dataframe)
Expand Down
39 changes: 37 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8516,6 +8516,19 @@ def _gotitem(
examples=_agg_examples_doc,
)
def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs):
used_apply, result = self._aggregate(func, axis, *args, **kwargs)
if used_apply:
warnings.warn(
"pandas internally used apply() to compute part of the result. "
"In a future version, agg() will be used internally instead, "
"possibly resulting in a different behavior.",
FutureWarning,
stacklevel=2,
)
return result

def _aggregate(self, func=None, axis: Axis = 0, *args, **kwargs):
"""Method for internal calls to aggregate."""
from pandas.core.apply import frame_apply

axis = self._get_axis_number(axis)
Expand All @@ -8537,7 +8550,7 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs):
result_in_dict = relabel_result(result, func, columns, order)
result = DataFrame(result_in_dict, index=columns)

return result
return op.agg_used_apply, result

agg = aggregate

Expand Down Expand Up @@ -8702,6 +8715,27 @@ def apply(
1 1 2
2 1 2
"""
used_agg, result = self._apply(func, axis, raw, result_type, args, **kwargs)
if used_agg:
warnings.warn(
"pandas internally used aggregate() to compute part of the result. "
"In a future version, apply() will be used internally instead, "
"possibly resulting in a different behavior.",
FutureWarning,
stacklevel=2,
)
return result

def _apply(
self,
func: AggFuncType,
axis: Axis = 0,
raw: bool = False,
result_type=None,
args=(),
**kwargs,
):
"""For internal calls to apply to avoid deprecation warnings."""
from pandas.core.apply import frame_apply

op = frame_apply(
Expand All @@ -8713,7 +8747,8 @@ def apply(
args=args,
kwargs=kwargs,
)
return op.apply()
result = op.apply()
return op.apply_used_agg, result

def applymap(
self, func: PythonFuncType, na_action: str | None = None, **kwargs
Expand Down
40 changes: 36 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4209,6 +4209,20 @@ def _gotitem(self, key, ndim, subset=None) -> Series:
examples=_agg_examples_doc,
)
def aggregate(self, func=None, axis=0, *args, **kwargs):
used_apply, result = self._aggregate(func, axis, *args, **kwargs)
if used_apply:
warnings.warn(
"pandas internally used apply() to compute part of the result. "
"In a future version, agg() will be used internally instead, "
"possibly resulting in a different behavior.",
FutureWarning,
stacklevel=2,
)
return result

agg = aggregate

def _aggregate(self, func=None, axis=0, *args, **kwargs):
# Validate the axis parameter
self._get_axis_number(axis)

Expand All @@ -4218,9 +4232,7 @@ def aggregate(self, func=None, axis=0, *args, **kwargs):

op = SeriesApply(self, func, convert_dtype=False, args=args, kwargs=kwargs)
result = op.agg()
return result

agg = aggregate
return op.agg_used_apply, result

@doc(
_shared_docs["transform"],
Expand Down Expand Up @@ -4347,7 +4359,27 @@ def apply(
Helsinki 2.484907
dtype: float64
"""
return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
used_agg, result = self._apply(func, convert_dtype, args, **kwargs)
if used_agg:
warnings.warn(
"pandas internally used aggregate() to compute part of the result. "
"In a future version, apply() will be used internally instead, "
"possibly resulting in a different behavior.",
FutureWarning,
stacklevel=2,
)
return result

def _apply(
self,
func: AggFuncType,
convert_dtype: bool = True,
args: tuple[Any, ...] = (),
**kwargs,
) -> tuple[bool, DataFrame | Series]:
op = SeriesApply(self, func, convert_dtype, args, kwargs)
result = op.apply()
return op.apply_used_agg, result

def _reduce(
self,
Expand Down
46 changes: 31 additions & 15 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def test_apply_axis1_with_ea():
def test_agg_axis1_duplicate_index(data, dtype):
# GH 42380
expected = DataFrame([[data], [data]], index=["a", "a"], dtype=dtype)
result = expected.agg(lambda x: x, axis=1)
with tm.assert_produces_warning(FutureWarning, match="used apply"):
result = expected.agg(lambda x: x, axis=1)
tm.assert_frame_equal(result, expected)


Expand Down Expand Up @@ -651,7 +652,12 @@ def apply_list(row):
return [2 * row["A"], 2 * row["C"], 2 * row["B"]]

df = DataFrame(np.zeros((4, 4)), columns=list("ABCD"))
result = getattr(df, op)(apply_list, axis=1)
if op == "apply":
klass, msg = None, None
else:
klass, msg = FutureWarning, "used apply"
with tm.assert_produces_warning(klass, match=msg):
result = getattr(df, op)(apply_list, axis=1)
expected = Series(
[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
)
Expand Down Expand Up @@ -1008,7 +1014,8 @@ def test_agg_transform(axis, float_frame):
tm.assert_frame_equal(result, expected)

# list-like
result = float_frame.apply([np.sqrt], axis=axis)
with tm.assert_produces_warning(FutureWarning, match="used aggregate"):
result = float_frame.apply([np.sqrt], axis=axis)
expected = f_sqrt.copy()
if axis in {0, "index"}:
expected.columns = MultiIndex.from_product([float_frame.columns, ["sqrt"]])
Expand All @@ -1019,7 +1026,8 @@ def test_agg_transform(axis, float_frame):
# multiple items in list
# these are in the order as if we are applying both
# functions per series and then concatting
result = float_frame.apply([np.abs, np.sqrt], axis=axis)
with tm.assert_produces_warning(FutureWarning, match="used aggregate"):
result = float_frame.apply([np.abs, np.sqrt], axis=axis)
expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
if axis in {0, "index"}:
expected.columns = MultiIndex.from_product(
Expand Down Expand Up @@ -1218,24 +1226,29 @@ def test_non_callable_aggregates(how):
df = DataFrame(
{"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]}
)

# Function aggregate
result = getattr(df, how)({"A": "count"})
if how == "apply":
klass, msg = FutureWarning, "used aggregate"
else:
klass, msg = None, None
with tm.assert_produces_warning(klass, match=msg):
result = getattr(df, how)({"A": "count"})
expected = Series({"A": 2})

tm.assert_series_equal(result, expected)

# Non-function aggregate
result = getattr(df, how)({"A": "size"})
with tm.assert_produces_warning(klass, match=msg):
result = getattr(df, how)({"A": "size"})
expected = Series({"A": 3})

tm.assert_series_equal(result, expected)

# Mix function and non-function aggs
result1 = getattr(df, how)(["count", "size"])
result2 = getattr(df, how)(
{"A": ["count", "size"], "B": ["count", "size"], "C": ["count", "size"]}
)
with tm.assert_produces_warning(klass, match=msg):
result1 = getattr(df, how)(["count", "size"])
result2 = getattr(df, how)(
{"A": ["count", "size"], "B": ["count", "size"], "C": ["count", "size"]}
)
expected = DataFrame(
{
"A": {"count": 2, "size": 3},
Expand Down Expand Up @@ -1277,7 +1290,8 @@ def test_agg_listlike_result():
def func(group_col):
return list(group_col.dropna().unique())

result = df.agg(func)
with tm.assert_produces_warning(FutureWarning, match="used apply"):
result = df.agg(func)
expected = Series([[2, 3], [1.5], ["foo", "bar"]], index=["A", "B", "C"])
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -1310,7 +1324,8 @@ def f(x, a, b, c=3):
else:
expected = Series([4.0, 8.0])

result = df.agg(f, axis, *args, **kwargs)
with tm.assert_produces_warning(FutureWarning, match="used apply"):
result = df.agg(f, axis, *args, **kwargs)

tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -1398,7 +1413,8 @@ def test_apply_empty_list_reduce():
def test_apply_no_suffix_index():
# GH36189
pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"])
result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
with tm.assert_produces_warning(FutureWarning, match="used aggregate"):
result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
expected = DataFrame(
{"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "<lambda>", "<lambda>"]
)
Expand Down
Loading