From cb1419bfebf08f0723d61b56cecd04b3917edf9b Mon Sep 17 00:00:00 2001 From: Moi Date: Sun, 3 Feb 2019 16:21:12 +0100 Subject: [PATCH 1/8] Solving issue 25011 --- pandas/core/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 7b3152595e4b2..16f3cd17341e9 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -556,7 +556,7 @@ def is_any_frame(): # we require a list, but not an 'str' return self._aggregate_multiple_funcs(arg, _level=_level, - _axis=_axis), None + _axis=_axis), True else: result = None From 95f8f0bfe921fe19b791942c403e82803c548be2 Mon Sep 17 00:00:00 2001 From: Moi Date: Sun, 10 Feb 2019 00:05:52 +0100 Subject: [PATCH 2/8] added test and whatsnew --- doc/source/whatsnew/v0.25.0.rst | 2 +- .../tests/groupby/aggregate/test_aggregate.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 09626be713c4f..f99790147516d 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -173,7 +173,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Bug in :method:'agg' when pqssing a list in the method agg on a Groupby with attibute as_index set as False, returned a DataFrame with the Groupby key as an index, now return the Groupby key as a column. - - diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 62ec0555f9033..012153694a734 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -287,3 +287,22 @@ def test_multi_function_flexible_mix(df): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = grouped.aggregate(d) tm.assert_frame_equal(result, expected) + +def test_not_as_index_agg_list(): + + array = [[3, 1, 2], + [3, 3, 4], + [4, 5, 6], + [4, 7, 8]] + df = pd.DataFrame(array, columns=['shouldnt_be_index', 'A', 'B']) + groupby = df.groupby('shouldnt_be_index', as_index=False) + result = groupby.agg(['min', 'max']) + + + array2 = [[3, 1, 3, 2, 4], + [4, 5, 7, 6, 8]] + columns = pd.MultiIndex(levels=[['A', 'B', 'shouldnt_be_index'], + ['min', 'max', '']], + codes=[[2, 0, 0, 1, 1], [2, 0, 1, 0, 1]]) + expected = pd.DataFrame(array2, columns=columns) + tm.assert_frame_equal(result, expected) From 055cfa73d89b04f277716391413e663f679fdd1b Mon Sep 17 00:00:00 2001 From: Moi Date: Sun, 10 Feb 2019 00:16:18 +0100 Subject: [PATCH 3/8] Minor Changes --- pandas/tests/groupby/aggregate/test_aggregate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 8dc1661fdad72..8cdf87dc4bdf4 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -287,6 +287,7 @@ def test_multi_function_flexible_mix(df): result = grouped.aggregate(d) tm.assert_frame_equal(result, expected) + def test_not_as_index_agg_list(): array = [[3, 1, 2], @@ -297,11 +298,11 @@ def test_not_as_index_agg_list(): groupby = df.groupby('shouldnt_be_index', as_index=False) result = groupby.agg(['min', 'max']) - array2 = [[3, 1, 3, 2, 4], [4, 5, 7, 6, 8]] columns = pd.MultiIndex(levels=[['A', 'B', 'shouldnt_be_index'], ['min', 'max', '']], codes=[[2, 0, 0, 1, 1], [2, 0, 1, 0, 1]]) expected = pd.DataFrame(array2, columns=columns) + tm.assert_frame_equal(result, expected) From ba4ebf7830d1bc99b25396b3859d3244d10b1d4f Mon Sep 17 00:00:00 2001 From: Moi Date: Sat, 16 Feb 2019 16:41:18 +0100 Subject: [PATCH 4/8] Test and whatnew entries updated --- doc/source/whatsnew/v0.25.0.rst | 2 +- .../tests/groupby/aggregate/test_aggregate.py | 25 ++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 3c4ab3bf6410a..956ed4e5252b5 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -170,7 +170,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in :method:'agg' when pqssing a list in the method agg on a Groupby with attibute as_index set as False, returned a DataFrame with the Groupby key as an index, now return the Groupby key as a column. +- Fixed bug in GroupBy when using as_index=False (issue:`25011`) - - diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 8cdf87dc4bdf4..3d4a46ebbda64 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -290,12 +290,15 @@ def test_multi_function_flexible_mix(df): def test_not_as_index_agg_list(): + # GH 25011 + # expected behavhior of agg with as_index=True + as_index = False array = [[3, 1, 2], [3, 3, 4], [4, 5, 6], [4, 7, 8]] df = pd.DataFrame(array, columns=['shouldnt_be_index', 'A', 'B']) - groupby = df.groupby('shouldnt_be_index', as_index=False) + groupby = df.groupby('shouldnt_be_index', as_index=as_index) result = groupby.agg(['min', 'max']) array2 = [[3, 1, 3, 2, 4], @@ -306,3 +309,23 @@ def test_not_as_index_agg_list(): expected = pd.DataFrame(array2, columns=columns) tm.assert_frame_equal(result, expected) + + # expected behavhior of agg with as_index=True + as_index = True + array = [[3, 1, 2], + [3, 3, 4], + [4, 5, 6], + [4, 7, 8]] + df = pd.DataFrame(array, columns=['should_be_index', 'A', 'B']) + groupby = df.groupby('should_be_index', as_index=as_index) + result = groupby.agg(['min', 'max']) + + array2 = [[1, 3, 2, 4], + [5, 7, 6, 8]] + columns = pd.MultiIndex(levels=[['A', 'B'], + ['min', 'max']], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + index = pd.Series([3, 4], name='should_be_index') + expected = pd.DataFrame(array2, columns=columns, index=index) + + tm.assert_frame_equal(result, expected) From b922fb462d91824585f253bbfee1e540fd917634 Mon Sep 17 00:00:00 2001 From: Moi Date: Thu, 21 Feb 2019 23:53:18 +0100 Subject: [PATCH 5/8] test changes --- .../tests/groupby/aggregate/test_aggregate.py | 42 +++++++------------ 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3d4a46ebbda64..c529fd3c9a351 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -287,12 +287,10 @@ def test_multi_function_flexible_mix(df): result = grouped.aggregate(d) tm.assert_frame_equal(result, expected) - +@pytest.mark.parametrize('as_index', [True, False]) def test_not_as_index_agg_list(): # GH 25011 - # expected behavhior of agg with as_index=True - as_index = False array = [[3, 1, 2], [3, 3, 4], [4, 5, 6], @@ -301,31 +299,23 @@ def test_not_as_index_agg_list(): groupby = df.groupby('shouldnt_be_index', as_index=as_index) result = groupby.agg(['min', 'max']) - array2 = [[3, 1, 3, 2, 4], - [4, 5, 7, 6, 8]] - columns = pd.MultiIndex(levels=[['A', 'B', 'shouldnt_be_index'], - ['min', 'max', '']], - codes=[[2, 0, 0, 1, 1], [2, 0, 1, 0, 1]]) - expected = pd.DataFrame(array2, columns=columns) + if as_index: - tm.assert_frame_equal(result, expected) + array2 = [[1, 3, 2, 4], + [5, 7, 6, 8]] + columns = pd.MultiIndex(levels=[['A', 'B'], + ['min', 'max']], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + index = pd.Series([3, 4], name='should_be_index') + expected = pd.DataFrame(array2, columns=columns, index=index) - # expected behavhior of agg with as_index=True - as_index = True - array = [[3, 1, 2], - [3, 3, 4], - [4, 5, 6], - [4, 7, 8]] - df = pd.DataFrame(array, columns=['should_be_index', 'A', 'B']) - groupby = df.groupby('should_be_index', as_index=as_index) - result = groupby.agg(['min', 'max']) + else: - array2 = [[1, 3, 2, 4], - [5, 7, 6, 8]] - columns = pd.MultiIndex(levels=[['A', 'B'], - ['min', 'max']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) - index = pd.Series([3, 4], name='should_be_index') - expected = pd.DataFrame(array2, columns=columns, index=index) + array2 = [[3, 1, 3, 2, 4], + [4, 5, 7, 6, 8]] + columns = pd.MultiIndex(levels=[['A', 'B', 'shouldnt_be_index'], + ['min', 'max', '']], + codes=[[2, 0, 0, 1, 1], [2, 0, 1, 0, 1]]) + expected = pd.DataFrame(array2, columns=columns) tm.assert_frame_equal(result, expected) From 8a8ace4f51992e6b4cc85ef56e91cffa45fdb5ec Mon Sep 17 00:00:00 2001 From: Moi Date: Wed, 27 Feb 2019 20:06:53 +0100 Subject: [PATCH 6/8] Minor changes --- pandas/tests/groupby/aggregate/test_aggregate.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 887313d011107..3d79907f7d66b 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -287,7 +287,7 @@ def test_multi_function_flexible_mix(df): result = grouped.aggregate(d) tm.assert_frame_equal(result, expected) -<<<<<<< HEAD + @pytest.mark.parametrize('as_index', [True, False]) def test_not_as_index_agg_list(): @@ -320,7 +320,7 @@ def test_not_as_index_agg_list(): expected = pd.DataFrame(array2, columns=columns) tm.assert_frame_equal(result, expected) -======= + def test_groupby_agg_coercing_bools(): # issue 14873 @@ -337,4 +337,3 @@ def test_groupby_agg_coercing_bools(): result = gp['c'].aggregate(lambda x: x.isnull().all()) expected = Series([True, False], index=index, name='c') tm.assert_series_equal(result, expected) ->>>>>>> upstream/master From 7f1161c1e7e9a9a407917dc8368146fad86b3e7f Mon Sep 17 00:00:00 2001 From: Moi Date: Wed, 27 Feb 2019 20:16:36 +0100 Subject: [PATCH 7/8] minor changes --- pandas/tests/groupby/aggregate/test_aggregate.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3d79907f7d66b..bf0bce9371617 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -289,15 +289,15 @@ def test_multi_function_flexible_mix(df): @pytest.mark.parametrize('as_index', [True, False]) -def test_not_as_index_agg_list(): +def test_not_as_index_agg_list(as_index): # GH 25011 array = [[3, 1, 2], [3, 3, 4], [4, 5, 6], [4, 7, 8]] - df = pd.DataFrame(array, columns=['shouldnt_be_index', 'A', 'B']) - groupby = df.groupby('shouldnt_be_index', as_index=as_index) + df = pd.DataFrame(array, columns=['index_iff_as_index', 'A', 'B']) + groupby = df.groupby('index_iff_as_index', as_index=as_index) result = groupby.agg(['min', 'max']) if as_index: @@ -305,16 +305,16 @@ def test_not_as_index_agg_list(): array2 = [[1, 3, 2, 4], [5, 7, 6, 8]] columns = pd.MultiIndex(levels=[['A', 'B'], - ['min', 'max']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) - index = pd.Series([3, 4], name='should_be_index') + ['min', 'max']], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + index = pd.Series([3, 4], name='index_iff_as_index') expected = pd.DataFrame(array2, columns=columns, index=index) else: array2 = [[3, 1, 3, 2, 4], [4, 5, 7, 6, 8]] - columns = pd.MultiIndex(levels=[['A', 'B', 'shouldnt_be_index'], + columns = pd.MultiIndex(levels=[['A', 'B', 'index_iff_as_index'], ['min', 'max', '']], codes=[[2, 0, 0, 1, 1], [2, 0, 1, 0, 1]]) expected = pd.DataFrame(array2, columns=columns) From f963b17b7ebc9ebdff4e200df21a95c08f3ed514 Mon Sep 17 00:00:00 2001 From: Moi Date: Fri, 12 Apr 2019 00:37:33 +0200 Subject: [PATCH 8/8] merge master --- doc/source/whatsnew/v0.25.0.rst | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index e8d4b2d2887ae..349c7852a0541 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -375,12 +375,6 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- -- Fixed bug in GroupBy when using as_index=False (issue:`25011`) -- -- - -======= - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`) - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`) - Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`) @@ -389,7 +383,7 @@ Groupby/Resample/Rolling - Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`, :issue:`25167`) - Bug in :meth:`pandas.core.window.Rolling.min` and :meth:`pandas.core.window.Rolling.max` that caused a memory leak (:issue:`25893`) - Bug in :func:`idxmax` and :func:`idxmin` on :meth:`DataFrame.groupby` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`) ->>>>>>> upstream/master +- Bug in ::meth:`pandas.core.groupby.GroupBy.aggwhen passingas_index=False``, returning an additional column. Reshaping ^^^^^^^^^