Skip to content

Backport PR #35723 on branch 1.1.x (agg with list of non-aggregating functions) #35738

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Fixed regressions
- Fixed regression in :meth:`DataFrame.reset_index` would raise a ``ValueError`` on empty :class:`DataFrame` with a :class:`MultiIndex` with a ``datetime64`` dtype level (:issue:`35606`, :issue:`35657`)
- Fixed regression where :meth:`DataFrame.merge_asof` would raise a ``UnboundLocalError`` when ``left_index`` , ``right_index`` and ``tolerance`` were set (:issue:`35558`)
- Fixed regression in ``.groupby(..).rolling(..)`` where a custom ``BaseIndexer`` would be ignored (:issue:`35557`)
- Fixed regression in :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` where a list of functions would produce the wrong results if at least one of the functions did not aggregate. (:issue:`35490`)

.. ---------------------------------------------------------------------------

Expand Down
25 changes: 15 additions & 10 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,11 +322,14 @@ def _aggregate_multiple_funcs(self, arg):
# let higher level handle
return results

output = self._wrap_aggregated_output(results)
output = self._wrap_aggregated_output(results, index=None)
return self.obj._constructor_expanddim(output, columns=columns)

# TODO: index should not be Optional - see GH 35490
def _wrap_series_output(
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index,
self,
output: Mapping[base.OutputKey, Union[Series, np.ndarray]],
index: Optional[Index],
) -> Union[Series, DataFrame]:
"""
Wraps the output of a SeriesGroupBy operation into the expected result.
Expand All @@ -335,7 +338,7 @@ def _wrap_series_output(
----------
output : Mapping[base.OutputKey, Union[Series, np.ndarray]]
Data to wrap.
index : pd.Index
index : pd.Index or None
Index to apply to the output.

Returns
Expand Down Expand Up @@ -363,8 +366,11 @@ def _wrap_series_output(

return result

# TODO: Remove index argument, use self.grouper.result_index, see GH 35490
def _wrap_aggregated_output(
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]]
self,
output: Mapping[base.OutputKey, Union[Series, np.ndarray]],
index: Optional[Index],
) -> Union[Series, DataFrame]:
"""
Wraps the output of a SeriesGroupBy aggregation into the expected result.
Expand All @@ -383,9 +389,7 @@ def _wrap_aggregated_output(
In the vast majority of cases output will only contain one element.
The exception is operations that expand dimensions, like ohlc.
"""
result = self._wrap_series_output(
output=output, index=self.grouper.result_index
)
result = self._wrap_series_output(output=output, index=index)
return self._reindex_output(result)

def _wrap_transformed_output(
Expand Down Expand Up @@ -1714,7 +1718,9 @@ def _insert_inaxis_grouper_inplace(self, result):
result.insert(0, name, lev)

def _wrap_aggregated_output(
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]]
self,
output: Mapping[base.OutputKey, Union[Series, np.ndarray]],
index: Optional[Index],
) -> DataFrame:
"""
Wraps the output of DataFrameGroupBy aggregations into the expected result.
Expand All @@ -1739,8 +1745,7 @@ def _wrap_aggregated_output(
self._insert_inaxis_grouper_inplace(result)
result = result._consolidate()
else:
index = self.grouper.result_index
result.index = index
result.index = self.grouper.result_index

if self.axis == 1:
result = result.T
Expand Down
10 changes: 6 additions & 4 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,7 +973,9 @@ def _cython_transform(self, how: str, numeric_only: bool = True, **kwargs):

return self._wrap_transformed_output(output)

def _wrap_aggregated_output(self, output: Mapping[base.OutputKey, np.ndarray]):
def _wrap_aggregated_output(
self, output: Mapping[base.OutputKey, np.ndarray], index: Optional[Index]
):
raise AbstractMethodError(self)

def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]):
Expand Down Expand Up @@ -1048,7 +1050,7 @@ def _cython_agg_general(
if len(output) == 0:
raise DataError("No numeric types to aggregate")

return self._wrap_aggregated_output(output)
return self._wrap_aggregated_output(output, index=self.grouper.result_index)

def _python_agg_general(
self, func, *args, engine="cython", engine_kwargs=None, **kwargs
Expand Down Expand Up @@ -1101,7 +1103,7 @@ def _python_agg_general(

output[key] = maybe_cast_result(values[mask], result)

return self._wrap_aggregated_output(output)
return self._wrap_aggregated_output(output, index=self.grouper.result_index)

def _concat_objects(self, keys, values, not_indexed_same: bool = False):
from pandas.core.reshape.concat import concat
Expand Down Expand Up @@ -2521,7 +2523,7 @@ def _get_cythonized_result(
raise TypeError(error_msg)

if aggregate:
return self._wrap_aggregated_output(output)
return self._wrap_aggregated_output(output, index=self.grouper.result_index)
else:
return self._wrap_transformed_output(output)

Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1061,3 +1061,16 @@ def test_groupby_get_by_index():
res = df.groupby("A").agg({"B": lambda x: x.get(x.index[-1])})
expected = pd.DataFrame(dict(A=["S", "W"], B=[1.0, 2.0])).set_index("A")
pd.testing.assert_frame_equal(res, expected)


def test_nonagg_agg():
# GH 35490 - Single/Multiple agg of non-agg function give same results
# TODO: agg should raise for functions that don't aggregate
df = pd.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 2, 1]})
g = df.groupby("a")

result = g.agg(["cumsum"])
result.columns = result.columns.droplevel(-1)
expected = g.agg("cumsum")

tm.assert_frame_equal(result, expected)