Skip to content

Commit ba234b7

Browse files
topper-123rhshadrach
authored and
im-vinicius
committed
DEPR: make Series.agg aggregate when possible (pandas-dev#53325)
* BUG: make Series.agg aggregate when possible * fix doc build * deprecate instead of treating as a bug * CLN: Apply.agg_list_like * some cleanups * REF/CLN: func in core.apply (pandas-dev#53437) * REF/CLN: func in core.apply * Remove type-hint * REF: Decouple Series.apply from Series.agg (pandas-dev#53400) * update test * fix issues * fix issues * fix issues --------- Co-authored-by: Richard Shadrach <[email protected]>
1 parent 0d509c8 commit ba234b7

File tree

7 files changed

+125
-38
lines changed

7 files changed

+125
-38
lines changed

doc/source/whatsnew/v2.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,8 @@ Deprecations
235235
- Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
236236
- Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
237237
- Deprecated explicit support for subclassing :class:`Index` (:issue:`45289`)
238+
- Deprecated making functions given to :meth:`Series.agg` attempt to operate on each element in the :class:`Series` and only operate on the whole :class:`Series` if the elementwise operations failed. In the future, functions given to :meth:`Series.agg` will always operate on the whole :class:`Series` only. To keep the current behavior, use :meth:`Series.transform` instead. (:issue:`53325`)
239+
- Deprecated making the functions in a list of functions given to :meth:`DataFrame.agg` attempt to operate on each element in the :class:`DataFrame` and only operate on the columns of the :class:`DataFrame` if the elementwise operations failed. To keep the current behavior, use :meth:`DataFrame.transform` instead. (:issue:`53325`)
238240
- Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
239241
- Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
240242
- Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`)

pandas/core/apply.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -1121,23 +1121,25 @@ def apply(self) -> DataFrame | Series:
11211121
def agg(self):
11221122
result = super().agg()
11231123
if result is None:
1124+
obj = self.obj
11241125
func = self.func
1125-
11261126
# string, list-like, and dict-like are entirely handled in super
11271127
assert callable(func)
11281128

1129-
# try a regular apply, this evaluates lambdas
1130-
# row-by-row; however if the lambda is expected a Series
1131-
# expression, e.g.: lambda x: x-x.quantile(0.25)
1132-
# this will fail, so we can try a vectorized evaluation
1133-
1134-
# we cannot FIRST try the vectorized evaluation, because
1135-
# then .agg and .apply would have different semantics if the
1136-
# operation is actually defined on the Series, e.g. str
1129+
# GH53325: The setup below is just to keep current behavior while emitting a
1130+
# deprecation message. In the future this will all be replaced with a simple
1131+
# `result = f(self.obj, *self.args, **self.kwargs)`.
11371132
try:
1138-
result = self.obj.apply(func, args=self.args, **self.kwargs)
1133+
result = obj.apply(func, args=self.args, **self.kwargs)
11391134
except (ValueError, AttributeError, TypeError):
1140-
result = func(self.obj, *self.args, **self.kwargs)
1135+
result = func(obj, *self.args, **self.kwargs)
1136+
else:
1137+
msg = (
1138+
f"using {func} in {type(obj).__name__}.agg cannot aggregate and "
1139+
f"has been deprecated. Use {type(obj).__name__}.transform to "
1140+
f"keep behavior unchanged."
1141+
)
1142+
warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
11411143

11421144
return result
11431145

pandas/tests/apply/test_frame_apply.py

+17-8
Original file line numberDiff line numberDiff line change
@@ -1478,8 +1478,8 @@ def test_any_apply_keyword_non_zero_axis_regression():
14781478
tm.assert_series_equal(result, expected)
14791479

14801480

1481-
def test_agg_list_like_func_with_args():
1482-
# GH 50624
1481+
def test_agg_mapping_func_deprecated():
1482+
# GH 53325
14831483
df = DataFrame({"x": [1, 2, 3]})
14841484

14851485
def foo1(x, a=1, c=0):
@@ -1488,17 +1488,26 @@ def foo1(x, a=1, c=0):
14881488
def foo2(x, b=2, c=0):
14891489
return x + b + c
14901490

1491-
msg = r"foo1\(\) got an unexpected keyword argument 'b'"
1492-
with pytest.raises(TypeError, match=msg):
1493-
df.agg([foo1, foo2], 0, 3, b=3, c=4)
1491+
# single func already takes the vectorized path
1492+
result = df.agg(foo1, 0, 3, c=4)
1493+
expected = df + 7
1494+
tm.assert_frame_equal(result, expected)
1495+
1496+
msg = "using .+ in Series.agg cannot aggregate and"
14941497

1495-
result = df.agg([foo1, foo2], 0, 3, c=4)
1498+
with tm.assert_produces_warning(FutureWarning, match=msg):
1499+
result = df.agg([foo1, foo2], 0, 3, c=4)
14961500
expected = DataFrame(
1497-
[[8, 8], [9, 9], [10, 10]],
1498-
columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]),
1501+
[[8, 8], [9, 9], [10, 10]], columns=[["x", "x"], ["foo1", "foo2"]]
14991502
)
15001503
tm.assert_frame_equal(result, expected)
15011504

1505+
# TODO: the result below is wrong, should be fixed (GH53325)
1506+
with tm.assert_produces_warning(FutureWarning, match=msg):
1507+
result = df.agg({"x": foo1}, 0, 3, c=4)
1508+
expected = DataFrame([2, 3, 4], columns=["x"])
1509+
tm.assert_frame_equal(result, expected)
1510+
15021511

15031512
def test_agg_std():
15041513
df = DataFrame(np.arange(6).reshape(3, 2), columns=["A", "B"])

pandas/tests/apply/test_frame_transform.py

+22
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,28 @@ def test_transform_empty_listlike(float_frame, ops, frame_or_series):
6666
obj.transform(ops)
6767

6868

69+
def test_transform_listlike_func_with_args():
70+
# GH 50624
71+
df = DataFrame({"x": [1, 2, 3]})
72+
73+
def foo1(x, a=1, c=0):
74+
return x + a + c
75+
76+
def foo2(x, b=2, c=0):
77+
return x + b + c
78+
79+
msg = r"foo1\(\) got an unexpected keyword argument 'b'"
80+
with pytest.raises(TypeError, match=msg):
81+
df.transform([foo1, foo2], 0, 3, b=3, c=4)
82+
83+
result = df.transform([foo1, foo2], 0, 3, c=4)
84+
expected = DataFrame(
85+
[[8, 8], [9, 9], [10, 10]],
86+
columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]),
87+
)
88+
tm.assert_frame_equal(result, expected)
89+
90+
6991
@pytest.mark.parametrize("box", [dict, Series])
7092
def test_transform_dictlike(axis, float_frame, box):
7193
# GH 35964

pandas/tests/apply/test_invalid_arg.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from itertools import chain
1010
import re
11+
import warnings
1112

1213
import numpy as np
1314
import pytest
@@ -307,7 +308,10 @@ def test_transform_and_agg_err_series(string_series, func, msg):
307308
# we are trying to transform with an aggregator
308309
with pytest.raises(ValueError, match=msg):
309310
with np.errstate(all="ignore"):
310-
string_series.agg(func)
311+
# GH53325
312+
with warnings.catch_warnings():
313+
warnings.simplefilter("ignore", FutureWarning)
314+
string_series.agg(func)
311315

312316

313317
@pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]])

pandas/tests/apply/test_series_apply.py

+31-18
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,18 @@ def f(x, a=0, b=0, c=0):
108108
return x + a + 10 * b + 100 * c
109109

110110
s = Series([1, 2])
111-
result = s.agg(f, 0, *args, **kwargs)
111+
msg = (
112+
"in Series.agg cannot aggregate and has been deprecated. "
113+
"Use Series.transform to keep behavior unchanged."
114+
)
115+
with tm.assert_produces_warning(FutureWarning, match=msg):
116+
result = s.agg(f, 0, *args, **kwargs)
112117
expected = s + increment
113118
tm.assert_series_equal(result, expected)
114119

115120

116-
def test_agg_list_like_func_with_args():
117-
# GH 50624
118-
121+
def test_agg_mapping_func_deprecated():
122+
# GH 53325
119123
s = Series([1, 2, 3])
120124

121125
def foo1(x, a=1, c=0):
@@ -124,13 +128,13 @@ def foo1(x, a=1, c=0):
124128
def foo2(x, b=2, c=0):
125129
return x + b + c
126130

127-
msg = r"foo1\(\) got an unexpected keyword argument 'b'"
128-
with pytest.raises(TypeError, match=msg):
129-
s.agg([foo1, foo2], 0, 3, b=3, c=4)
130-
131-
result = s.agg([foo1, foo2], 0, 3, c=4)
132-
expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]})
133-
tm.assert_frame_equal(result, expected)
131+
msg = "using .+ in Series.agg cannot aggregate and"
132+
with tm.assert_produces_warning(FutureWarning, match=msg):
133+
s.agg(foo1, 0, 3, c=4)
134+
with tm.assert_produces_warning(FutureWarning, match=msg):
135+
s.agg([foo1, foo2], 0, 3, c=4)
136+
with tm.assert_produces_warning(FutureWarning, match=msg):
137+
s.agg({"a": foo1, "b": foo2}, 0, 3, c=4)
134138

135139

136140
def test_series_apply_map_box_timestamps(by_row):
@@ -391,23 +395,32 @@ def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row):
391395
assert result == str(string_series)
392396

393397

394-
def test_with_nested_series(datetime_series):
398+
def test_agg_evaluate_lambdas(string_series):
399+
# GH53325
400+
# in the future, the result will be a Series class.
401+
402+
with tm.assert_produces_warning(FutureWarning):
403+
result = string_series.agg(lambda x: type(x))
404+
assert isinstance(result, Series) and len(result) == len(string_series)
405+
406+
with tm.assert_produces_warning(FutureWarning):
407+
result = string_series.agg(type)
408+
assert isinstance(result, Series) and len(result) == len(string_series)
409+
410+
411+
@pytest.mark.parametrize("op_name", ["agg", "apply"])
412+
def test_with_nested_series(datetime_series, op_name):
395413
# GH 2316
396414
# .agg with a reducer and a transform, what to do
397415
msg = "Returning a DataFrame from Series.apply when the supplied function"
398416
with tm.assert_produces_warning(FutureWarning, match=msg):
399417
# GH52123
400-
result = datetime_series.apply(
418+
result = getattr(datetime_series, op_name)(
401419
lambda x: Series([x, x**2], index=["x", "x^2"])
402420
)
403421
expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2})
404422
tm.assert_frame_equal(result, expected)
405423

406-
with tm.assert_produces_warning(FutureWarning, match=msg):
407-
# GH52123
408-
result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"]))
409-
tm.assert_frame_equal(result, expected)
410-
411424

412425
def test_replicate_describe(string_series, by_row):
413426
# this also tests a result set that is all scalars

pandas/tests/apply/test_series_transform.py

+35
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,21 @@
1010
import pandas._testing as tm
1111

1212

13+
@pytest.mark.parametrize(
14+
"args, kwargs, increment",
15+
[((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
16+
)
17+
def test_agg_args(args, kwargs, increment):
18+
# GH 43357
19+
def f(x, a=0, b=0, c=0):
20+
return x + a + 10 * b + 100 * c
21+
22+
s = Series([1, 2])
23+
result = s.transform(f, 0, *args, **kwargs)
24+
expected = s + increment
25+
tm.assert_series_equal(result, expected)
26+
27+
1328
@pytest.mark.parametrize(
1429
"ops, names",
1530
[
@@ -28,6 +43,26 @@ def test_transform_listlike(string_series, ops, names):
2843
tm.assert_frame_equal(result, expected)
2944

3045

46+
def test_transform_listlike_func_with_args():
47+
# GH 50624
48+
49+
s = Series([1, 2, 3])
50+
51+
def foo1(x, a=1, c=0):
52+
return x + a + c
53+
54+
def foo2(x, b=2, c=0):
55+
return x + b + c
56+
57+
msg = r"foo1\(\) got an unexpected keyword argument 'b'"
58+
with pytest.raises(TypeError, match=msg):
59+
s.transform([foo1, foo2], 0, 3, b=3, c=4)
60+
61+
result = s.transform([foo1, foo2], 0, 3, c=4)
62+
expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]})
63+
tm.assert_frame_equal(result, expected)
64+
65+
3166
@pytest.mark.parametrize("box", [dict, Series])
3267
def test_transform_dictlike(string_series, box):
3368
# GH 35964

0 commit comments

Comments
 (0)