From e692d3fc5af285b4487c8c7ed29ba1c570952023 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 12 May 2023 17:33:13 -0700 Subject: [PATCH] BUG: DataFrame.agg not returning a reduced result when providing a lambda --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/apply.py | 7 +++++++ pandas/tests/apply/test_frame_apply.py | 21 +++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 52fc8512c9db3..c88bbb0e8fc5c 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -405,6 +405,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ +- Bug in :meth:`DataFrame.agg` when providing a dict-like or tuple-like renaming argument with a ``lambda`` function (:issue:`41768`) - Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` in incorrectly allowing non-fixed ``freq`` when resampling on a :class:`TimedeltaIndex` (:issue:`51896`) - Bug in :meth:`DataFrameGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmax` return wrong dtype when used on empty DataFrameGroupBy or SeriesGroupBy (:issue:`51423`) - Bug in weighted rolling aggregations when specifying ``min_periods=0`` (:issue:`51449`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 0c2adb89a2422..c3fdf4349b6e9 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -44,6 +44,7 @@ from pandas.core.dtypes.common import ( is_dict_like, is_list_like, + is_scalar, is_sequence, ) from pandas.core.dtypes.dtypes import ( @@ -1100,9 +1101,15 @@ def agg(self): # operation is actually defined on the Series, e.g. str try: result = self.obj.apply(f) + # GH 41768: Attempt to return a reduced result + if not is_scalar(result): + result = f(self.obj) except (ValueError, AttributeError, TypeError): result = f(self.obj) + # TODO: Shouldn't we validate this returns a scalar? + # Would fail test_agg_listlike_result, test_agg_transform + return result def apply_empty_result(self) -> Series: diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 5a2574e62b41e..7b555371c2b1e 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1515,3 +1515,24 @@ def test_agg_dist_like_and_nonunique_columns(): result = df.agg({"A": "count"}) expected = df["A"].count() tm.assert_series_equal(result, expected) + + +def test_agg_lambda_tuple_result_rename(): + # GH 41768 + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9], [np.nan, np.nan, np.nan]], + columns=["A", "B", "C"], + ) + result = df.agg(z=("C", lambda x: np.mean(x))) + expected = DataFrame([6.0], index=["z"], columns=["C"]) + tm.assert_frame_equal(result, expected) + + +def test_agg_dictlike_lambda(): + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9], [np.nan, np.nan, np.nan]], + columns=["A", "B", "C"], + ) + result = df.agg({"C": lambda x: np.mean(x)}) + expected = Series([6.0], index=["C"]) + tm.assert_series_equal(result, expected)