diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 5c9c3e2931bd9..338364a943edf 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -43,7 +43,10 @@ Documentation Changes Bug Fixes ~~~~~~~~~ -- +Groupby/Resample/Rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :func:`DataFrame.agg` where applying multiple aggregation functions to a :class:`DataFrame` with duplicated column names would cause a stack overflow (:issue:`21063`) - Conversion diff --git a/pandas/core/base.py b/pandas/core/base.py index fa78c89ed4ee7..aa051c6f5eaef 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -590,9 +590,10 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis): # multiples else: - for col in obj: + for index, col in enumerate(obj): try: - colg = self._gotitem(col, ndim=1, subset=obj[col]) + colg = self._gotitem(col, ndim=1, + subset=obj.iloc[:, index]) results.append(colg.aggregate(arg)) keys.append(col) except (TypeError, DataError): @@ -675,7 +676,6 @@ def _gotitem(self, key, ndim, subset=None): subset : object, default None subset to act on """ - # create a new object to prevent aliasing if subset is None: subset = self.obj diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dccc840f5affd..77a67c048a48d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5731,7 +5731,12 @@ def diff(self, periods=1, axis=0): # ---------------------------------------------------------------------- # Function application - def _gotitem(self, key, ndim, subset=None): + def _gotitem(self, + key, # type: Union[str, List[str]] + ndim, # type: int + subset=None # type: Union[Series, DataFrame, None] + ): + # type: (...) -> Union[Series, DataFrame] """ sub-classes to define return a sliced object @@ -5746,9 +5751,11 @@ def _gotitem(self, key, ndim, subset=None): """ if subset is None: subset = self + elif subset.ndim == 1: # is Series + return subset # TODO: _shallow_copy(subset)? - return self[key] + return subset[key] _agg_doc = dedent(""" The aggregation operations are always performed over an axis, either the diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index ac46f02d00773..dfb2961befe35 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -554,6 +554,14 @@ def test_apply_non_numpy_dtype(self): result = df.apply(lambda x: x) assert_frame_equal(result, df) + def test_apply_dup_names_multi_agg(self): + # GH 21063 + df = pd.DataFrame([[0, 1], [2, 3]], columns=['a', 'a']) + expected = pd.DataFrame([[0, 1]], columns=['a', 'a'], index=['min']) + result = df.agg(['min']) + + tm.assert_frame_equal(result, expected) + class TestInferOutputShape(object): # the user has supplied an opaque UDF where