diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 546600e1b2f4b..8e72ce83ac028 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -253,6 +253,7 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`) - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`) - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`) +- Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`) Reshaping diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c5f9e52e07ecf..bdae6f36b5572 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -219,7 +219,7 @@ def _aggregate_generic(self, func, *args, **kwargs): axis = self.axis obj = self._obj_with_exclusions - result = {} + result = collections.OrderedDict() if axis != obj._info_axis_number: try: for name, data in self: @@ -246,7 +246,7 @@ def _aggregate_item_by_item(self, func, *args, **kwargs): # only for axis==0 obj = self._obj_with_exclusions - result = {} + result = collections.OrderedDict() cannot_agg = [] errors = None for item in obj: @@ -899,7 +899,7 @@ def _get_index(): name=self._selection_name) def _aggregate_named(self, func, *args, **kwargs): - result = {} + result = collections.OrderedDict() for name, group in self: group.name = name diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 0c2e74c0b735f..ae8ed8db0aa5d 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -303,3 +303,15 @@ def test_groupby_agg_coercing_bools(): result = gp['c'].aggregate(lambda x: x.isnull().all()) expected = Series([True, False], index=index, name='c') tm.assert_series_equal(result, expected) + + +def test_order_aggregate_multiple_funcs(): + # GH 25692 + df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) + + res = df.groupby('A').agg(['sum', 'max', 'mean', 'ohlc', 'min']) + result = res.columns.levels[1] + + expected = pd.Index(['sum', 'max', 'mean', 'ohlc', 'min']) + + tm.assert_index_equal(result, expected)