From 8c3745b9b2921e310db8f31dd24425f95f31fd19 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 21 May 2023 08:04:50 +0100 Subject: [PATCH 01/11] BUG: make Series.agg aggregate when possible --- doc/source/whatsnew/v2.1.0.rst | 43 +++++++++++++++++++++++-- pandas/tests/apply/test_series_apply.py | 15 ++++++--- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6bb972c21d927..dd0d9ecfaaed3 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -114,10 +114,47 @@ Notable bug fixes These are bug fixes that might have notable behavior changes. -.. _whatsnew_210.notable_bug_fixes.notable_bug_fix1: +.. _whatsnew_210.notable_bug_fixes.series_agg: + +User defined functions in Series.agg will always be passed the whole :class:`Series` for evaluation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, :meth:`Series.agg` would attempt to apply user-defined functions on each element first and only if that failed would it apply user-defined function to the whole :class:`Series`. This would mean it in some cases didn't aggregate when given an aggregation function, and that the result for :meth:`Series.agg` could be different than the single-column result from :meth:`DataFrame.agg`: + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: ser = pd.Series([1, 2, 3]) + In [2]: ser.agg(lambda x: np.sum(x, where=True)) + Out[2]: + 0 1 + 1 2 + 2 3 + dtype: int64 + In [3]: ser.agg(type) + Out[3]: + 0 + 1 + 2 + dtype: object + In [3]: df = ser.to_frame() + In [4]: df.agg(type)[0] + pandas.core.series.Series + +Now user-defined functions in :meth:`Series.agg` will always be passed the whole :class:`Series` for evaluation: + +*New behavior*: + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + ser.agg(lambda x: np.sum(x, where=True)) # fails, as it should + ser.agg(type) + ser.agg(type) == ser.to_frame().agg(type)[0] + +More generally, the result from :meth:`Series.agg` will now always be the same as the single-column result from :meth:`DataFrame.agg` (:issue:`53324`). -notable_bug_fix1 -^^^^^^^^^^^^^^^^ .. _whatsnew_210.notable_bug_fixes.notable_bug_fix2: diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 985cb5aa5b09c..3863add8d288c 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -391,6 +391,16 @@ def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row): assert result == str(string_series) +def test_agg_evaluate_lambdas(string_series): + expected = Series + + result = string_series.agg(lambda x: type(x)) + assert result is expected + + result = string_series.agg(type) + assert result is expected + + def test_with_nested_series(datetime_series): # GH 2316 # .agg with a reducer and a transform, what to do @@ -403,11 +413,6 @@ def test_with_nested_series(datetime_series): expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2}) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH52123 - result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"])) - tm.assert_frame_equal(result, expected) - def test_replicate_describe(string_series, by_row): # this also tests a result set that is all scalars From 57b76eb1f08bcae0417b2a72b0cad64f2b891ce6 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 21 May 2023 11:59:54 +0100 Subject: [PATCH 02/11] fix doc build --- doc/source/whatsnew/v2.1.0.rst | 11 ++++++----- pandas/tests/apply/test_series_apply.py | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index dd0d9ecfaaed3..d5ad23a69eef4 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -147,13 +147,14 @@ Now user-defined functions in :meth:`Series.agg` will always be passed the whole *New behavior*: .. ipython:: python + :okexcept: - ser = pd.Series([1, 2, 3]) - ser.agg(lambda x: np.sum(x, where=True)) # fails, as it should - ser.agg(type) - ser.agg(type) == ser.to_frame().agg(type)[0] + ser = pd.Series([1, 2, 3]) + ser.agg(lambda x: np.sum(x, where=True)) # fails, as it should + ser.agg(type) + ser.agg(type) == ser.to_frame().agg(type)[0] -More generally, the result from :meth:`Series.agg` will now always be the same as the single-column result from :meth:`DataFrame.agg` (:issue:`53324`). +More generally, the result from :meth:`Series.agg` will now always be the same as the single-column result from :meth:`DataFrame.agg` (:issue:`53325`). .. _whatsnew_210.notable_bug_fixes.notable_bug_fix2: diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 3863add8d288c..087d7514c4f42 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -392,6 +392,7 @@ def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row): def test_agg_evaluate_lambdas(string_series): + # GH53325 expected = Series result = string_series.agg(lambda x: type(x)) From afb0bf8a51ff1d8e645a23afa1f674c4f8ff44ae Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Mon, 22 May 2023 23:22:03 +0100 Subject: [PATCH 03/11] deprecate instead of treating as a bug --- doc/source/whatsnew/v2.1.0.rst | 46 +++----------------- pandas/core/apply.py | 27 +++++++----- pandas/tests/apply/test_frame_apply.py | 21 ++++----- pandas/tests/apply/test_frame_transform.py | 22 ++++++++++ pandas/tests/apply/test_series_apply.py | 48 ++++++++++----------- pandas/tests/apply/test_series_transform.py | 35 +++++++++++++++ 6 files changed, 109 insertions(+), 90 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index d5ad23a69eef4..1d02b8128ef50 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -114,48 +114,10 @@ Notable bug fixes These are bug fixes that might have notable behavior changes. -.. _whatsnew_210.notable_bug_fixes.series_agg: - -User defined functions in Series.agg will always be passed the whole :class:`Series` for evaluation -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Previously, :meth:`Series.agg` would attempt to apply user-defined functions on each element first and only if that failed would it apply user-defined function to the whole :class:`Series`. This would mean it in some cases didn't aggregate when given an aggregation function, and that the result for :meth:`Series.agg` could be different than the single-column result from :meth:`DataFrame.agg`: - -*Previous behavior*: - -.. code-block:: ipython - - In [1]: ser = pd.Series([1, 2, 3]) - In [2]: ser.agg(lambda x: np.sum(x, where=True)) - Out[2]: - 0 1 - 1 2 - 2 3 - dtype: int64 - In [3]: ser.agg(type) - Out[3]: - 0 - 1 - 2 - dtype: object - In [3]: df = ser.to_frame() - In [4]: df.agg(type)[0] - pandas.core.series.Series - -Now user-defined functions in :meth:`Series.agg` will always be passed the whole :class:`Series` for evaluation: - -*New behavior*: - -.. ipython:: python - :okexcept: - - ser = pd.Series([1, 2, 3]) - ser.agg(lambda x: np.sum(x, where=True)) # fails, as it should - ser.agg(type) - ser.agg(type) == ser.to_frame().agg(type)[0] - -More generally, the result from :meth:`Series.agg` will now always be the same as the single-column result from :meth:`DataFrame.agg` (:issue:`53325`). +.. _whatsnew_210.notable_bug_fixes.notable_bug_fix1: +notable_bug_fix1 +^^^^^^^^^^^^^^^^ .. _whatsnew_210.notable_bug_fixes.notable_bug_fix2: @@ -273,6 +235,8 @@ Deprecations - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`) - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`) - Deprecated explicit support for subclassing :class:`Index` (:issue:`45289`) +- Deprecated making functions given to :meth:`Series.agg` attempt to operate on each element in the :class:`Series` and only operate on the whole :class:`Series` if the elementwise operations failed. In the future, functions given to :meth:`Series.agg` will always operate on the whole :class:`Series` only. To keep the current behavior, use :meth:`Series.transform` instead. (:issue:`53325`) +- Deprecated making the functions in a list of functions given to :meth:`DataFrame.agg` attempt to operate on each element in the :class:`DataFrame` and only operate on the columns of the :class:`DataFrame` if the elementwise operations failed. To keep the current behavior, use :meth:`DataFrame.transform` instead. (:issue:`53325`) - Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`) - Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`) - Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 007dd2bb2a89d..5cf5be710a221 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1121,23 +1121,28 @@ def apply(self) -> DataFrame | Series: def agg(self): result = super().agg() if result is None: + obj = self.obj func = self.func - # string, list-like, and dict-like are entirely handled in super assert callable(func) - # try a regular apply, this evaluates lambdas - # row-by-row; however if the lambda is expected a Series - # expression, e.g.: lambda x: x-x.quantile(0.25) - # this will fail, so we can try a vectorized evaluation - - # we cannot FIRST try the vectorized evaluation, because - # then .agg and .apply would have different semantics if the - # operation is actually defined on the Series, e.g. str + # GH53325: The setup below is just to keep current behavior while emitting a + # deprecation message. In the future this will all be replaced with a simple + # `result = f(self.obj)`. + if isinstance(func, np.ufunc): + with np.errstate(all="ignore"): + return func(obj) try: - result = self.obj.apply(func, args=self.args, **self.kwargs) + result = obj.apply(func) except (ValueError, AttributeError, TypeError): - result = func(self.obj, *self.args, **self.kwargs) + result = func(self.obj) + else: + msg = ( + f"using {func} in {type(obj).__name__}.agg cannot aggregate and " + f"has been deprecated. Use {type(obj).__name__}.transform to " + f"keep behavior unchanged." + ) + warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) return result diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index fc8b57d26a5be..05258eb5f8391 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1478,8 +1478,8 @@ def test_any_apply_keyword_non_zero_axis_regression(): tm.assert_series_equal(result, expected) -def test_agg_list_like_func_with_args(): - # GH 50624 +def test_agg_mapping_func_deprecated(): + # GH 53325 df = DataFrame({"x": [1, 2, 3]}) def foo1(x, a=1, c=0): @@ -1488,16 +1488,13 @@ def foo1(x, a=1, c=0): def foo2(x, b=2, c=0): return x + b + c - msg = r"foo1\(\) got an unexpected keyword argument 'b'" - with pytest.raises(TypeError, match=msg): - df.agg([foo1, foo2], 0, 3, b=3, c=4) - - result = df.agg([foo1, foo2], 0, 3, c=4) - expected = DataFrame( - [[8, 8], [9, 9], [10, 10]], - columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]), - ) - tm.assert_frame_equal(result, expected) + # single func already takes the vectorized path + df.agg(foo1, 0, 3, c=4) + msg = "using .+ in Series.agg cannot aggregate and" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.agg([foo1, foo2], 0, 3, c=4) + with tm.assert_produces_warning(FutureWarning, match=msg): + df.agg({"x": foo1}, 0, 3, c=4) def test_agg_std(): diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index 8e385de0b48e0..2d57515882aed 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -66,6 +66,28 @@ def test_transform_empty_listlike(float_frame, ops, frame_or_series): obj.transform(ops) +def test_transform_listlike_func_with_args(): + # GH 50624 + df = DataFrame({"x": [1, 2, 3]}) + + def foo1(x, a=1, c=0): + return x + a + c + + def foo2(x, b=2, c=0): + return x + b + c + + msg = r"foo1\(\) got an unexpected keyword argument 'b'" + with pytest.raises(TypeError, match=msg): + df.transform([foo1, foo2], 0, 3, b=3, c=4) + + result = df.transform([foo1, foo2], 0, 3, c=4) + expected = DataFrame( + [[8, 8], [9, 9], [10, 10]], + columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [dict, Series]) def test_transform_dictlike(axis, float_frame, box): # GH 35964 diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 087d7514c4f42..b3437f207acd3 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -98,24 +98,18 @@ def test_apply_args(): assert isinstance(result[0], list) -@pytest.mark.parametrize( - "args, kwargs, increment", - [((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)], -) -def test_agg_args(args, kwargs, increment): - # GH 43357 - def f(x, a=0, b=0, c=0): - return x + a + 10 * b + 100 * c +def test_agg_args(): + def f(x, increment): + return x.sum() + increment s = Series([1, 2]) - result = s.agg(f, 0, *args, **kwargs) - expected = s + increment - tm.assert_series_equal(result, expected) - + result = s.agg(f, increment=0) + expected = s.sum() + assert result == expected -def test_agg_list_like_func_with_args(): - # GH 50624 +def test_agg_mapping_func_deprecated(): + # GH 53325 s = Series([1, 2, 3]) def foo1(x, a=1, c=0): @@ -124,13 +118,13 @@ def foo1(x, a=1, c=0): def foo2(x, b=2, c=0): return x + b + c - msg = r"foo1\(\) got an unexpected keyword argument 'b'" - with pytest.raises(TypeError, match=msg): - s.agg([foo1, foo2], 0, 3, b=3, c=4) - - result = s.agg([foo1, foo2], 0, 3, c=4) - expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]}) - tm.assert_frame_equal(result, expected) + msg = "using .+ in Series.agg cannot aggregate and" + with tm.assert_produces_warning(FutureWarning, match=msg): + s.agg(foo1, 0, 3, c=4) + with tm.assert_produces_warning(FutureWarning, match=msg): + s.agg([foo1, foo2], 0, 3, c=4) + with tm.assert_produces_warning(FutureWarning, match=msg): + s.agg({"a": foo1, "b": foo2}, 0, 3, c=4) def test_series_apply_map_box_timestamps(by_row): @@ -393,13 +387,15 @@ def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row): def test_agg_evaluate_lambdas(string_series): # GH53325 - expected = Series + # in the future, the result will be a Series class. - result = string_series.agg(lambda x: type(x)) - assert result is expected + with tm.assert_produces_warning(FutureWarning): + result = string_series.agg(lambda x: type(x)) + assert isinstance(result, Series) and len(result) == len(string_series) - result = string_series.agg(type) - assert result is expected + with tm.assert_produces_warning(FutureWarning): + result = string_series.agg(type) + assert isinstance(result, Series) and len(result) == len(string_series) def test_with_nested_series(datetime_series): diff --git a/pandas/tests/apply/test_series_transform.py b/pandas/tests/apply/test_series_transform.py index b10af13eae20c..82592c4711ece 100644 --- a/pandas/tests/apply/test_series_transform.py +++ b/pandas/tests/apply/test_series_transform.py @@ -10,6 +10,21 @@ import pandas._testing as tm +@pytest.mark.parametrize( + "args, kwargs, increment", + [((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)], +) +def test_agg_args(args, kwargs, increment): + # GH 43357 + def f(x, a=0, b=0, c=0): + return x + a + 10 * b + 100 * c + + s = Series([1, 2]) + result = s.transform(f, 0, *args, **kwargs) + expected = s + increment + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( "ops, names", [ @@ -28,6 +43,26 @@ def test_transform_listlike(string_series, ops, names): tm.assert_frame_equal(result, expected) +def test_transform_listlike_func_with_args(): + # GH 50624 + + s = Series([1, 2, 3]) + + def foo1(x, a=1, c=0): + return x + a + c + + def foo2(x, b=2, c=0): + return x + b + c + + msg = r"foo1\(\) got an unexpected keyword argument 'b'" + with pytest.raises(TypeError, match=msg): + s.transform([foo1, foo2], 0, 3, b=3, c=4) + + result = s.transform([foo1, foo2], 0, 3, c=4) + expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [dict, Series]) def test_transform_dictlike(string_series, box): # GH 35964 From 9bb2b876feb1f5baf58d8ad924d18e42ddaa326c Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 23 May 2023 16:19:07 +0100 Subject: [PATCH 04/11] CLN: Apply.agg_list_like --- pandas/core/apply.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 5cf5be710a221..a8a63d684912b 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -323,6 +323,7 @@ def agg_or_apply_list_like( is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy)) + is_ser_or_df = isinstance(obj, (ABCDataFrame, ABCSeries)) context_manager: ContextManager if is_groupby: # When as_index=False, we combine all results using indices @@ -341,8 +342,8 @@ def include_axis(colg) -> bool: if selected_obj.ndim == 1: for a in func: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) - args = [self.axis, *self.args] if include_axis(colg) else self.args - new_res = getattr(colg, op_name)(a, *args, **kwargs) + this_args = [self.axis, *self.args] if is_ser_or_df else self.args + new_res = getattr(colg, op_name)(a, *this_args, **kwargs) results.append(new_res) # make sure we find a good name @@ -353,8 +354,8 @@ def include_axis(colg) -> bool: indices = [] for index, col in enumerate(selected_obj): colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - args = [self.axis, *self.args] if include_axis(colg) else self.args - new_res = getattr(colg, op_name)(func, *args, **kwargs) + this_args = [self.axis, *self.args] if is_ser_or_df else self.args + new_res = getattr(colg, op_name)(func, *this_args, **kwargs) results.append(new_res) indices.append(index) keys = selected_obj.columns.take(indices) From 2e54b05a68a76f6ebb4bade27af0613e676254bc Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 23 May 2023 17:31:05 +0100 Subject: [PATCH 05/11] some cleanups --- pandas/core/apply.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index a8a63d684912b..834dc110abb52 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -324,6 +324,8 @@ def agg_or_apply_list_like( is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy)) is_ser_or_df = isinstance(obj, (ABCDataFrame, ABCSeries)) + this_args = [self.axis, *self.args] if is_ser_or_df else self.args + context_manager: ContextManager if is_groupby: # When as_index=False, we combine all results using indices @@ -342,7 +344,6 @@ def include_axis(colg) -> bool: if selected_obj.ndim == 1: for a in func: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) - this_args = [self.axis, *self.args] if is_ser_or_df else self.args new_res = getattr(colg, op_name)(a, *this_args, **kwargs) results.append(new_res) @@ -354,7 +355,6 @@ def include_axis(colg) -> bool: indices = [] for index, col in enumerate(selected_obj): colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - this_args = [self.axis, *self.args] if is_ser_or_df else self.args new_res = getattr(colg, op_name)(func, *this_args, **kwargs) results.append(new_res) indices.append(index) From c292c5bded12fbbbcd4c644721cd796d0187f750 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Tue, 30 May 2023 14:36:18 -0400 Subject: [PATCH 06/11] REF/CLN: func in core.apply (#53437) * REF/CLN: func in core.apply * Remove type-hint --- pandas/core/apply.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 834dc110abb52..9f9636a2ab50f 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1130,13 +1130,10 @@ def agg(self): # GH53325: The setup below is just to keep current behavior while emitting a # deprecation message. In the future this will all be replaced with a simple # `result = f(self.obj)`. - if isinstance(func, np.ufunc): - with np.errstate(all="ignore"): - return func(obj) try: - result = obj.apply(func) + result = obj.apply(func, args=self.args, **self.kwargs) except (ValueError, AttributeError, TypeError): - result = func(self.obj) + result = func(obj, *self.args, **self.kwargs) else: msg = ( f"using {func} in {type(obj).__name__}.agg cannot aggregate and " From 13ba267a69687bd86a851b2f4c32a6f1a9b82b60 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Mon, 5 Jun 2023 21:30:44 +0100 Subject: [PATCH 07/11] REF: Decouple Series.apply from Series.agg (#53400) --- pandas/tests/apply/test_series_apply.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index b3437f207acd3..8dc27931b8390 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -398,13 +398,14 @@ def test_agg_evaluate_lambdas(string_series): assert isinstance(result, Series) and len(result) == len(string_series) -def test_with_nested_series(datetime_series): +@pytest.mark.parametrize("op_name", ["agg", "apply"]) +def test_with_nested_series(datetime_series, op_name): # GH 2316 # .agg with a reducer and a transform, what to do msg = "Returning a DataFrame from Series.apply when the supplied function" with tm.assert_produces_warning(FutureWarning, match=msg): # GH52123 - result = datetime_series.apply( + result = getattr(datetime_series, op_name)( lambda x: Series([x, x**2], index=["x", "x^2"]) ) expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2}) From ff7c64bb396f2255f78965db1df03d7fe84fb4e9 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 6 Jun 2023 07:19:15 +0100 Subject: [PATCH 08/11] update test --- pandas/tests/apply/test_invalid_arg.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index d75b784302676..21b5c803d0e76 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -8,6 +8,7 @@ from itertools import chain import re +import warnings import numpy as np import pytest @@ -307,7 +308,10 @@ def test_transform_and_agg_err_series(string_series, func, msg): # we are trying to transform with an aggregator with pytest.raises(ValueError, match=msg): with np.errstate(all="ignore"): - string_series.agg(func) + # GH53325 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + string_series.agg(func) @pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]]) From 78c133a5a6863b44ae0338d7621e5eb9476fa667 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 6 Jun 2023 08:15:23 +0100 Subject: [PATCH 09/11] fix issues --- pandas/core/apply.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 9f9636a2ab50f..81a5dcab632c7 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -323,9 +323,6 @@ def agg_or_apply_list_like( is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy)) - is_ser_or_df = isinstance(obj, (ABCDataFrame, ABCSeries)) - this_args = [self.axis, *self.args] if is_ser_or_df else self.args - context_manager: ContextManager if is_groupby: # When as_index=False, we combine all results using indices @@ -344,7 +341,8 @@ def include_axis(colg) -> bool: if selected_obj.ndim == 1: for a in func: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) - new_res = getattr(colg, op_name)(a, *this_args, **kwargs) + args = [self.axis, *self.args] if include_axis(colg) else self.args + new_res = getattr(colg, op_name)(a, *args, **kwargs) results.append(new_res) # make sure we find a good name @@ -355,7 +353,8 @@ def include_axis(colg) -> bool: indices = [] for index, col in enumerate(selected_obj): colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - new_res = getattr(colg, op_name)(func, *this_args, **kwargs) + args = [self.axis, *self.args] if include_axis(colg) else self.args + new_res = getattr(colg, op_name)(func, *args, **kwargs) results.append(new_res) indices.append(index) keys = selected_obj.columns.take(indices) From fbc3092350728a6273415a0598b34fd5adb24752 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 6 Jun 2023 13:59:45 +0100 Subject: [PATCH 10/11] fix issues --- pandas/core/apply.py | 2 +- pandas/tests/apply/test_frame_apply.py | 20 ++++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 81a5dcab632c7..1b2aa1d053240 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1128,7 +1128,7 @@ def agg(self): # GH53325: The setup below is just to keep current behavior while emitting a # deprecation message. In the future this will all be replaced with a simple - # `result = f(self.obj)`. + # `result = f(self.obj, *self.args, **self.kwargs)`. try: result = obj.apply(func, args=self.args, **self.kwargs) except (ValueError, AttributeError, TypeError): diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 05258eb5f8391..a904a99fe15ff 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1489,12 +1489,24 @@ def foo2(x, b=2, c=0): return x + b + c # single func already takes the vectorized path - df.agg(foo1, 0, 3, c=4) + result = df.agg(foo1, 0, 3, c=4) + expected = df + 7 + tm.assert_frame_equal(result, expected) + msg = "using .+ in Series.agg cannot aggregate and" + + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.agg([foo1, foo2], 0, 3, c=4) + expected = pd.DataFrame( + [[8, 8], [9, 9], [10, 10]], columns=[["x", "x"], ["foo1", "foo2"]] + ) + tm.assert_frame_equal(result, expected) + + # TODO: the result below is wrong, should be fixed (GH53325) with tm.assert_produces_warning(FutureWarning, match=msg): - df.agg([foo1, foo2], 0, 3, c=4) - with tm.assert_produces_warning(FutureWarning, match=msg): - df.agg({"x": foo1}, 0, 3, c=4) + result = df.agg({"x": foo1}, 0, 3, c=4) + expected = pd.DataFrame([2, 3, 4], columns=["x"]) + tm.assert_frame_equal(result, expected) def test_agg_std(): From a0e319aacc79befda3073717d114e9366a88fb0c Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 6 Jun 2023 14:33:17 +0100 Subject: [PATCH 11/11] fix issues --- pandas/tests/apply/test_frame_apply.py | 4 ++-- pandas/tests/apply/test_series_apply.py | 22 ++++++++++++++++------ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index a904a99fe15ff..99fc393ff82c5 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1497,7 +1497,7 @@ def foo2(x, b=2, c=0): with tm.assert_produces_warning(FutureWarning, match=msg): result = df.agg([foo1, foo2], 0, 3, c=4) - expected = pd.DataFrame( + expected = DataFrame( [[8, 8], [9, 9], [10, 10]], columns=[["x", "x"], ["foo1", "foo2"]] ) tm.assert_frame_equal(result, expected) @@ -1505,7 +1505,7 @@ def foo2(x, b=2, c=0): # TODO: the result below is wrong, should be fixed (GH53325) with tm.assert_produces_warning(FutureWarning, match=msg): result = df.agg({"x": foo1}, 0, 3, c=4) - expected = pd.DataFrame([2, 3, 4], columns=["x"]) + expected = DataFrame([2, 3, 4], columns=["x"]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 8dc27931b8390..425d2fb42a711 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -98,14 +98,24 @@ def test_apply_args(): assert isinstance(result[0], list) -def test_agg_args(): - def f(x, increment): - return x.sum() + increment +@pytest.mark.parametrize( + "args, kwargs, increment", + [((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)], +) +def test_agg_args(args, kwargs, increment): + # GH 43357 + def f(x, a=0, b=0, c=0): + return x + a + 10 * b + 100 * c s = Series([1, 2]) - result = s.agg(f, increment=0) - expected = s.sum() - assert result == expected + msg = ( + "in Series.agg cannot aggregate and has been deprecated. " + "Use Series.transform to keep behavior unchanged." + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = s.agg(f, 0, *args, **kwargs) + expected = s + increment + tm.assert_series_equal(result, expected) def test_agg_mapping_func_deprecated():