Skip to content

Commit d623ffd

Browse files
WillAydTomAugspurger
authored andcommitted
BUG: Prevent Unlimited Agg Recursion with Duplicate Col Names (#21066)
1 parent 1ee5ecf commit d623ffd

File tree

4 files changed

+24
-6
lines changed

4 files changed

+24
-6
lines changed

doc/source/whatsnew/v0.23.1.txt

+4-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@ Documentation Changes
4444
Bug Fixes
4545
~~~~~~~~~
4646

47-
-
47+
Groupby/Resample/Rolling
48+
^^^^^^^^^^^^^^^^^^^^^^^^
49+
50+
- Bug in :func:`DataFrame.agg` where applying multiple aggregation functions to a :class:`DataFrame` with duplicated column names would cause a stack overflow (:issue:`21063`)
4851
-
4952

5053
Conversion

pandas/core/base.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -590,9 +590,10 @@ def _aggregate_multiple_funcs(self, arg, _level, _axis):
590590

591591
# multiples
592592
else:
593-
for col in obj:
593+
for index, col in enumerate(obj):
594594
try:
595-
colg = self._gotitem(col, ndim=1, subset=obj[col])
595+
colg = self._gotitem(col, ndim=1,
596+
subset=obj.iloc[:, index])
596597
results.append(colg.aggregate(arg))
597598
keys.append(col)
598599
except (TypeError, DataError):
@@ -675,7 +676,6 @@ def _gotitem(self, key, ndim, subset=None):
675676
subset : object, default None
676677
subset to act on
677678
"""
678-
679679
# create a new object to prevent aliasing
680680
if subset is None:
681681
subset = self.obj

pandas/core/frame.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -5731,7 +5731,12 @@ def diff(self, periods=1, axis=0):
57315731
# ----------------------------------------------------------------------
57325732
# Function application
57335733

5734-
def _gotitem(self, key, ndim, subset=None):
5734+
def _gotitem(self,
5735+
key, # type: Union[str, List[str]]
5736+
ndim, # type: int
5737+
subset=None # type: Union[Series, DataFrame, None]
5738+
):
5739+
# type: (...) -> Union[Series, DataFrame]
57355740
"""
57365741
sub-classes to define
57375742
return a sliced object
@@ -5746,9 +5751,11 @@ def _gotitem(self, key, ndim, subset=None):
57465751
"""
57475752
if subset is None:
57485753
subset = self
5754+
elif subset.ndim == 1: # is Series
5755+
return subset
57495756

57505757
# TODO: _shallow_copy(subset)?
5751-
return self[key]
5758+
return subset[key]
57525759

57535760
_agg_doc = dedent("""
57545761
The aggregation operations are always performed over an axis, either the

pandas/tests/frame/test_apply.py

+8
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,14 @@ def test_apply_non_numpy_dtype(self):
554554
result = df.apply(lambda x: x)
555555
assert_frame_equal(result, df)
556556

557+
def test_apply_dup_names_multi_agg(self):
558+
# GH 21063
559+
df = pd.DataFrame([[0, 1], [2, 3]], columns=['a', 'a'])
560+
expected = pd.DataFrame([[0, 1]], columns=['a', 'a'], index=['min'])
561+
result = df.agg(['min'])
562+
563+
tm.assert_frame_equal(result, expected)
564+
557565

558566
class TestInferOutputShape(object):
559567
# the user has supplied an opaque UDF where

0 commit comments

Comments
 (0)