Skip to content

Commit c5031fa

Browse files
Backport PR #35723: agg with list of non-aggregating functions (#35738)
Co-authored-by: Richard Shadrach <[email protected]>
1 parent 3ef3617 commit c5031fa

File tree

4 files changed

+35
-14
lines changed

4 files changed

+35
-14
lines changed

doc/source/whatsnew/v1.1.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Fixed regressions
2626
- Fixed regression in :meth:`DataFrame.reset_index` would raise a ``ValueError`` on empty :class:`DataFrame` with a :class:`MultiIndex` with a ``datetime64`` dtype level (:issue:`35606`, :issue:`35657`)
2727
- Fixed regression where :meth:`DataFrame.merge_asof` would raise a ``UnboundLocalError`` when ``left_index`` , ``right_index`` and ``tolerance`` were set (:issue:`35558`)
2828
- Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`)
29+
- Fixed regression in :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` where a list of functions would produce the wrong results if at least one of the functions did not aggregate. (:issue:`35490`)
2930

3031
.. ---------------------------------------------------------------------------
3132

pandas/core/groupby/generic.py

+15-10
Original file line numberDiff line numberDiff line change
@@ -322,11 +322,14 @@ def _aggregate_multiple_funcs(self, arg):
322322
# let higher level handle
323323
return results
324324

325-
output = self._wrap_aggregated_output(results)
325+
output = self._wrap_aggregated_output(results, index=None)
326326
return self.obj._constructor_expanddim(output, columns=columns)
327327

328+
# TODO: index should not be Optional - see GH 35490
328329
def _wrap_series_output(
329-
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index,
330+
self,
331+
output: Mapping[base.OutputKey, Union[Series, np.ndarray]],
332+
index: Optional[Index],
330333
) -> Union[Series, DataFrame]:
331334
"""
332335
Wraps the output of a SeriesGroupBy operation into the expected result.
@@ -335,7 +338,7 @@ def _wrap_series_output(
335338
----------
336339
output : Mapping[base.OutputKey, Union[Series, np.ndarray]]
337340
Data to wrap.
338-
index : pd.Index
341+
index : pd.Index or None
339342
Index to apply to the output.
340343
341344
Returns
@@ -363,8 +366,11 @@ def _wrap_series_output(
363366

364367
return result
365368

369+
# TODO: Remove index argument, use self.grouper.result_index, see GH 35490
366370
def _wrap_aggregated_output(
367-
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]]
371+
self,
372+
output: Mapping[base.OutputKey, Union[Series, np.ndarray]],
373+
index: Optional[Index],
368374
) -> Union[Series, DataFrame]:
369375
"""
370376
Wraps the output of a SeriesGroupBy aggregation into the expected result.
@@ -383,9 +389,7 @@ def _wrap_aggregated_output(
383389
In the vast majority of cases output will only contain one element.
384390
The exception is operations that expand dimensions, like ohlc.
385391
"""
386-
result = self._wrap_series_output(
387-
output=output, index=self.grouper.result_index
388-
)
392+
result = self._wrap_series_output(output=output, index=index)
389393
return self._reindex_output(result)
390394

391395
def _wrap_transformed_output(
@@ -1714,7 +1718,9 @@ def _insert_inaxis_grouper_inplace(self, result):
17141718
result.insert(0, name, lev)
17151719

17161720
def _wrap_aggregated_output(
1717-
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]]
1721+
self,
1722+
output: Mapping[base.OutputKey, Union[Series, np.ndarray]],
1723+
index: Optional[Index],
17181724
) -> DataFrame:
17191725
"""
17201726
Wraps the output of DataFrameGroupBy aggregations into the expected result.
@@ -1739,8 +1745,7 @@ def _wrap_aggregated_output(
17391745
self._insert_inaxis_grouper_inplace(result)
17401746
result = result._consolidate()
17411747
else:
1742-
index = self.grouper.result_index
1743-
result.index = index
1748+
result.index = self.grouper.result_index
17441749

17451750
if self.axis == 1:
17461751
result = result.T

pandas/core/groupby/groupby.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -973,7 +973,9 @@ def _cython_transform(self, how: str, numeric_only: bool = True, **kwargs):
973973

974974
return self._wrap_transformed_output(output)
975975

976-
def _wrap_aggregated_output(self, output: Mapping[base.OutputKey, np.ndarray]):
976+
def _wrap_aggregated_output(
977+
self, output: Mapping[base.OutputKey, np.ndarray], index: Optional[Index]
978+
):
977979
raise AbstractMethodError(self)
978980

979981
def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]):
@@ -1048,7 +1050,7 @@ def _cython_agg_general(
10481050
if len(output) == 0:
10491051
raise DataError("No numeric types to aggregate")
10501052

1051-
return self._wrap_aggregated_output(output)
1053+
return self._wrap_aggregated_output(output, index=self.grouper.result_index)
10521054

10531055
def _python_agg_general(
10541056
self, func, *args, engine="cython", engine_kwargs=None, **kwargs
@@ -1101,7 +1103,7 @@ def _python_agg_general(
11011103

11021104
output[key] = maybe_cast_result(values[mask], result)
11031105

1104-
return self._wrap_aggregated_output(output)
1106+
return self._wrap_aggregated_output(output, index=self.grouper.result_index)
11051107

11061108
def _concat_objects(self, keys, values, not_indexed_same: bool = False):
11071109
from pandas.core.reshape.concat import concat
@@ -2521,7 +2523,7 @@ def _get_cythonized_result(
25212523
raise TypeError(error_msg)
25222524

25232525
if aggregate:
2524-
return self._wrap_aggregated_output(output)
2526+
return self._wrap_aggregated_output(output, index=self.grouper.result_index)
25252527
else:
25262528
return self._wrap_transformed_output(output)
25272529

pandas/tests/groupby/aggregate/test_aggregate.py

+13
Original file line numberDiff line numberDiff line change
@@ -1061,3 +1061,16 @@ def test_groupby_get_by_index():
10611061
res = df.groupby("A").agg({"B": lambda x: x.get(x.index[-1])})
10621062
expected = pd.DataFrame(dict(A=["S", "W"], B=[1.0, 2.0])).set_index("A")
10631063
pd.testing.assert_frame_equal(res, expected)
1064+
1065+
1066+
def test_nonagg_agg():
1067+
# GH 35490 - Single/Multiple agg of non-agg function give same results
1068+
# TODO: agg should raise for functions that don't aggregate
1069+
df = pd.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 2, 1]})
1070+
g = df.groupby("a")
1071+
1072+
result = g.agg(["cumsum"])
1073+
result.columns = result.columns.droplevel(-1)
1074+
expected = g.agg("cumsum")
1075+
1076+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)