From d5eade993da90441bfa7e35de999a7c1f133c991 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 6 Jul 2018 08:49:58 -0500 Subject: [PATCH 1/3] Added failing test case --- pandas/tests/groupby/test_groupby.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e05f9de5ea7f4..e4d8da2e26f5e 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1674,3 +1674,21 @@ def test_tuple_correct_keyerror(): [3, 4]])) with tm.assert_raises_regex(KeyError, "(7, 8)"): df.groupby((7, 8)).mean() + + +def test_groupby_ohlc_agg(): + df = pd.DataFrame([[1], [1]], columns=['foo'], + index=pd.date_range('2018-01-01', periods=2, freq='D')) + + expected = pd.DataFrame([ + [1, 1, 1, 1, 1], + [1, 1, 1, 1, 1] + ], columns=pd.MultiIndex.from_tuples(( + ('foo', 'ohlc', 'open'), ('foo', 'ohlc', 'high'), + ('foo', 'ohlc', 'low'), ('foo', 'ohlc', 'close'), + ('foo', 'sum', 0))), index=pd.date_range( + '2018-01-01', periods=2, freq='D')) + + result = df.groupby(pd.Grouper(freq='D')).agg(['sum', 'ohlc']) + + tm.assert_frame_equal(result, expected) From 03c1c2552c3d61c7c38c8e89a4021855dfbfb4aa Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 6 Jul 2018 11:09:38 -0500 Subject: [PATCH 2/3] Removed requirement for ohlc as first arg to agg --- pandas/core/groupby/groupby.py | 6 ++---- pandas/tests/groupby/test_groupby.py | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c69d7f43de8ea..026b5bc3874fa 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3557,13 +3557,11 @@ def _aggregate_multiple_funcs(self, arg, _level): obj._selection = name results[name] = obj.aggregate(func) - if isinstance(list(compat.itervalues(results))[0], - DataFrame): - + if any(isinstance(x, DataFrame) for x in compat.itervalues(results)): # let higher level handle if _level: return results - return list(compat.itervalues(results))[0] + return DataFrame(results, columns=columns) def _wrap_output(self, output, index, names=None): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e4d8da2e26f5e..12eb1d7ba625d 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1676,7 +1676,7 @@ def test_tuple_correct_keyerror(): df.groupby((7, 8)).mean() -def test_groupby_ohlc_agg(): +def test_groupby_agg_ohlc_non_first(): df = pd.DataFrame([[1], [1]], columns=['foo'], index=pd.date_range('2018-01-01', periods=2, freq='D')) @@ -1686,7 +1686,7 @@ def test_groupby_ohlc_agg(): ], columns=pd.MultiIndex.from_tuples(( ('foo', 'ohlc', 'open'), ('foo', 'ohlc', 'high'), ('foo', 'ohlc', 'low'), ('foo', 'ohlc', 'close'), - ('foo', 'sum', 0))), index=pd.date_range( + ('foo', 'sum', 'foo'))), index=pd.date_range( '2018-01-01', periods=2, freq='D')) result = df.groupby(pd.Grouper(freq='D')).agg(['sum', 'ohlc']) From ba55be0cc2369464ed2ee3d11ed019aecbf1380b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 6 Jul 2018 11:38:23 -0500 Subject: [PATCH 3/3] Whatsnew and test comment --- doc/source/whatsnew/v0.23.3.txt | 5 +++++ pandas/tests/groupby/test_groupby.py | 1 + 2 files changed, 6 insertions(+) diff --git a/doc/source/whatsnew/v0.23.3.txt b/doc/source/whatsnew/v0.23.3.txt index d308cf7a3cfac..266d2ce448fb0 100644 --- a/doc/source/whatsnew/v0.23.3.txt +++ b/doc/source/whatsnew/v0.23.3.txt @@ -24,6 +24,11 @@ Fixed Regressions Bug Fixes ~~~~~~~~~ +**Groupby/Resample/Rolling** + +- Bug where calling :func:`DataFrameGroupBy.agg` with a list of functions including ``ohlc`` as the non-initial element would raise a ``ValueError`` (:issue:`21716`) +- + **Conversion** - diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 12eb1d7ba625d..66577d738dd28 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1677,6 +1677,7 @@ def test_tuple_correct_keyerror(): def test_groupby_agg_ohlc_non_first(): + # GH 21716 df = pd.DataFrame([[1], [1]], columns=['foo'], index=pd.date_range('2018-01-01', periods=2, freq='D'))