Skip to content

Commit 0658ce3

Browse files
authored
agg with list of non-aggregating functions (#35723)
1 parent 7c31b2d commit 0658ce3

File tree

4 files changed

+35
-14
lines changed

4 files changed

+35
-14
lines changed

doc/source/whatsnew/v1.1.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Fixed regressions
2626
- Fixed regression in :meth:`DataFrame.reset_index` would raise a ``ValueError`` on empty :class:`DataFrame` with a :class:`MultiIndex` with a ``datetime64`` dtype level (:issue:`35606`, :issue:`35657`)
2727
- Fixed regression where :meth:`DataFrame.merge_asof` would raise a ``UnboundLocalError`` when ``left_index`` , ``right_index`` and ``tolerance`` were set (:issue:`35558`)
2828
- Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`)
29+
- Fixed regression in :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` where a list of functions would produce the wrong results if at least one of the functions did not aggregate. (:issue:`35490`)
2930

3031
.. ---------------------------------------------------------------------------
3132

pandas/core/groupby/generic.py

+15-10
Original file line numberDiff line numberDiff line change
@@ -322,11 +322,14 @@ def _aggregate_multiple_funcs(self, arg):
322322
# let higher level handle
323323
return results
324324

325-
output = self._wrap_aggregated_output(results)
325+
output = self._wrap_aggregated_output(results, index=None)
326326
return self.obj._constructor_expanddim(output, columns=columns)
327327

328+
# TODO: index should not be Optional - see GH 35490
328329
def _wrap_series_output(
329-
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index,
330+
self,
331+
output: Mapping[base.OutputKey, Union[Series, np.ndarray]],
332+
index: Optional[Index],
330333
) -> Union[Series, DataFrame]:
331334
"""
332335
Wraps the output of a SeriesGroupBy operation into the expected result.
@@ -335,7 +338,7 @@ def _wrap_series_output(
335338
----------
336339
output : Mapping[base.OutputKey, Union[Series, np.ndarray]]
337340
Data to wrap.
338-
index : pd.Index
341+
index : pd.Index or None
339342
Index to apply to the output.
340343
341344
Returns
@@ -363,8 +366,11 @@ def _wrap_series_output(
363366

364367
return result
365368

369+
# TODO: Remove index argument, use self.grouper.result_index, see GH 35490
366370
def _wrap_aggregated_output(
367-
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]]
371+
self,
372+
output: Mapping[base.OutputKey, Union[Series, np.ndarray]],
373+
index: Optional[Index],
368374
) -> Union[Series, DataFrame]:
369375
"""
370376
Wraps the output of a SeriesGroupBy aggregation into the expected result.
@@ -383,9 +389,7 @@ def _wrap_aggregated_output(
383389
In the vast majority of cases output will only contain one element.
384390
The exception is operations that expand dimensions, like ohlc.
385391
"""
386-
result = self._wrap_series_output(
387-
output=output, index=self.grouper.result_index
388-
)
392+
result = self._wrap_series_output(output=output, index=index)
389393
return self._reindex_output(result)
390394

391395
def _wrap_transformed_output(
@@ -1720,7 +1724,9 @@ def _insert_inaxis_grouper_inplace(self, result):
17201724
result.insert(0, name, lev)
17211725

17221726
def _wrap_aggregated_output(
1723-
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]]
1727+
self,
1728+
output: Mapping[base.OutputKey, Union[Series, np.ndarray]],
1729+
index: Optional[Index],
17241730
) -> DataFrame:
17251731
"""
17261732
Wraps the output of DataFrameGroupBy aggregations into the expected result.
@@ -1745,8 +1751,7 @@ def _wrap_aggregated_output(
17451751
self._insert_inaxis_grouper_inplace(result)
17461752
result = result._consolidate()
17471753
else:
1748-
index = self.grouper.result_index
1749-
result.index = index
1754+
result.index = self.grouper.result_index
17501755

17511756
if self.axis == 1:
17521757
result = result.T

pandas/core/groupby/groupby.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -974,7 +974,9 @@ def _cython_transform(self, how: str, numeric_only: bool = True, **kwargs):
974974

975975
return self._wrap_transformed_output(output)
976976

977-
def _wrap_aggregated_output(self, output: Mapping[base.OutputKey, np.ndarray]):
977+
def _wrap_aggregated_output(
978+
self, output: Mapping[base.OutputKey, np.ndarray], index: Optional[Index]
979+
):
978980
raise AbstractMethodError(self)
979981

980982
def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]):
@@ -1049,7 +1051,7 @@ def _cython_agg_general(
10491051
if len(output) == 0:
10501052
raise DataError("No numeric types to aggregate")
10511053

1052-
return self._wrap_aggregated_output(output)
1054+
return self._wrap_aggregated_output(output, index=self.grouper.result_index)
10531055

10541056
def _python_agg_general(
10551057
self, func, *args, engine="cython", engine_kwargs=None, **kwargs
@@ -1102,7 +1104,7 @@ def _python_agg_general(
11021104

11031105
output[key] = maybe_cast_result(values[mask], result)
11041106

1105-
return self._wrap_aggregated_output(output)
1107+
return self._wrap_aggregated_output(output, index=self.grouper.result_index)
11061108

11071109
def _concat_objects(self, keys, values, not_indexed_same: bool = False):
11081110
from pandas.core.reshape.concat import concat
@@ -2534,7 +2536,7 @@ def _get_cythonized_result(
25342536
raise TypeError(error_msg)
25352537

25362538
if aggregate:
2537-
return self._wrap_aggregated_output(output)
2539+
return self._wrap_aggregated_output(output, index=self.grouper.result_index)
25382540
else:
25392541
return self._wrap_transformed_output(output)
25402542

pandas/tests/groupby/aggregate/test_aggregate.py

+13
Original file line numberDiff line numberDiff line change
@@ -1061,3 +1061,16 @@ def test_groupby_get_by_index():
10611061
res = df.groupby("A").agg({"B": lambda x: x.get(x.index[-1])})
10621062
expected = pd.DataFrame(dict(A=["S", "W"], B=[1.0, 2.0])).set_index("A")
10631063
pd.testing.assert_frame_equal(res, expected)
1064+
1065+
1066+
def test_nonagg_agg():
1067+
# GH 35490 - Single/Multiple agg of non-agg function give same results
1068+
# TODO: agg should raise for functions that don't aggregate
1069+
df = pd.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 2, 1]})
1070+
g = df.groupby("a")
1071+
1072+
result = g.agg(["cumsum"])
1073+
result.columns = result.columns.droplevel(-1)
1074+
expected = g.agg("cumsum")
1075+
1076+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)