Skip to content

Commit 935be95

Browse files
MarcoGorellifangchenli
authored andcommitted
BUG: aggregations were getting overwritten if they had the same name (pandas-dev#30858)
* 🐛 aggregations were getting overwritten if they had the same name
1 parent cf1b141 commit 935be95

File tree

3 files changed

+68
-6
lines changed

3 files changed

+68
-6
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1093,6 +1093,7 @@ Reshaping
10931093
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
10941094
- :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`)
10951095
- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`)
1096+
- Bug in :meth:`SeriesGroupBy.aggregate` was resulting in aggregations being overwritten when they shared the same name (:issue:`30880`)
10961097
- Bug where :meth:`Index.astype` would lose the name attribute when converting from ``Float64Index`` to ``Int64Index``, or when casting to an ``ExtensionArray`` dtype (:issue:`32013`)
10971098
- :meth:`Series.append` will now raise a ``TypeError`` when passed a DataFrame or a sequence containing Dataframe (:issue:`31413`)
10981099
- :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`)

pandas/core/groupby/generic.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def aggregate(
278278
if isinstance(ret, dict):
279279
from pandas import concat
280280

281-
ret = concat(ret, axis=1)
281+
ret = concat(ret.values(), axis=1, keys=[key.label for key in ret.keys()])
282282
return ret
283283

284284
agg = aggregate
@@ -307,8 +307,8 @@ def _aggregate_multiple_funcs(self, arg):
307307

308308
arg = zip(columns, arg)
309309

310-
results = {}
311-
for name, func in arg:
310+
results: Dict[base.OutputKey, Union[Series, DataFrame]] = {}
311+
for idx, (name, func) in enumerate(arg):
312312
obj = self
313313

314314
# reset the cache so that we
@@ -317,13 +317,14 @@ def _aggregate_multiple_funcs(self, arg):
317317
obj = copy.copy(obj)
318318
obj._reset_cache()
319319
obj._selection = name
320-
results[name] = obj.aggregate(func)
320+
results[base.OutputKey(label=name, position=idx)] = obj.aggregate(func)
321321

322322
if any(isinstance(x, DataFrame) for x in results.values()):
323323
# let higher level handle
324324
return results
325325

326-
return self.obj._constructor_expanddim(results, columns=columns)
326+
output = self._wrap_aggregated_output(results)
327+
return self.obj._constructor_expanddim(output, columns=columns)
327328

328329
def _wrap_series_output(
329330
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index,
@@ -354,10 +355,12 @@ def _wrap_series_output(
354355
if len(output) > 1:
355356
result = self.obj._constructor_expanddim(indexed_output, index=index)
356357
result.columns = columns
357-
else:
358+
elif not columns.empty:
358359
result = self.obj._constructor(
359360
indexed_output[0], index=index, name=columns[0]
360361
)
362+
else:
363+
result = self.obj._constructor_expanddim()
361364

362365
return result
363366

pandas/tests/groupby/aggregate/test_aggregate.py

+58
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
test .agg behavior / note that .apply is tested generally in test_groupby.py
33
"""
44
import functools
5+
from functools import partial
56

67
import numpy as np
78
import pytest
89

10+
from pandas.errors import PerformanceWarning
11+
912
from pandas.core.dtypes.common import is_integer_dtype
1013

1114
import pandas as pd
@@ -252,6 +255,61 @@ def test_agg_multiple_functions_maintain_order(df):
252255
tm.assert_index_equal(result.columns, exp_cols)
253256

254257

258+
def test_agg_multiple_functions_same_name():
259+
# GH 30880
260+
df = pd.DataFrame(
261+
np.random.randn(1000, 3),
262+
index=pd.date_range("1/1/2012", freq="S", periods=1000),
263+
columns=["A", "B", "C"],
264+
)
265+
result = df.resample("3T").agg(
266+
{"A": [partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]}
267+
)
268+
expected_index = pd.date_range("1/1/2012", freq="3T", periods=6)
269+
expected_columns = MultiIndex.from_tuples([("A", "quantile"), ("A", "quantile")])
270+
expected_values = np.array(
271+
[df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]]
272+
).T
273+
expected = pd.DataFrame(
274+
expected_values, columns=expected_columns, index=expected_index
275+
)
276+
tm.assert_frame_equal(result, expected)
277+
278+
279+
def test_agg_multiple_functions_same_name_with_ohlc_present():
280+
# GH 30880
281+
# ohlc expands dimensions, so different test to the above is required.
282+
df = pd.DataFrame(
283+
np.random.randn(1000, 3),
284+
index=pd.date_range("1/1/2012", freq="S", periods=1000),
285+
columns=["A", "B", "C"],
286+
)
287+
result = df.resample("3T").agg(
288+
{"A": ["ohlc", partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]}
289+
)
290+
expected_index = pd.date_range("1/1/2012", freq="3T", periods=6)
291+
expected_columns = pd.MultiIndex.from_tuples(
292+
[
293+
("A", "ohlc", "open"),
294+
("A", "ohlc", "high"),
295+
("A", "ohlc", "low"),
296+
("A", "ohlc", "close"),
297+
("A", "quantile", "A"),
298+
("A", "quantile", "A"),
299+
]
300+
)
301+
non_ohlc_expected_values = np.array(
302+
[df.resample("3T").A.quantile(q=q).values for q in [0.9999, 0.1111]]
303+
).T
304+
expected_values = np.hstack([df.resample("3T").A.ohlc(), non_ohlc_expected_values])
305+
expected = pd.DataFrame(
306+
expected_values, columns=expected_columns, index=expected_index
307+
)
308+
# PerformanceWarning is thrown by `assert col in right` in assert_frame_equal
309+
with tm.assert_produces_warning(PerformanceWarning):
310+
tm.assert_frame_equal(result, expected)
311+
312+
255313
def test_multiple_functions_tuples_and_non_tuples(df):
256314
# #1359
257315
funcs = [("foo", "mean"), "std"]

0 commit comments

Comments
 (0)