From eb1e63f6ce538e8251440c25f215a3b975f790c0 Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Mon, 3 Oct 2016 15:28:05 -0400 Subject: [PATCH 1/8] Added test cases for GH 5677 --- pandas/tests/test_groupby.py | 140 +++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index dc326aeaa88ac..a7043cf12bba7 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -521,6 +521,146 @@ def test_grouper_column_and_index(self): expected = df_single.reset_index().groupby(['inner', 'B']).mean() assert_frame_equal(result, expected) + def test_grouper_column_and_index_sugar(self): + # GH 5677, allow strings passed as the `by` parameter to reference + # columns or index levels + + idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), + ('b', 1), ('b', 2), ('b', 3)]) + idx.names = ['outer', 'inner'] + df_multi = pd.DataFrame({"A": np.arange(6), + 'B': ['one', 'one', 'two', + 'two', 'one', 'one']}, + index=idx) + + df_single = df_multi.reset_index('outer') + + # Column and Index on MultiIndex + result = df_multi.groupby(['B', 'inner']).mean() + expected = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean() + assert_frame_equal(result, expected) + + # Index and Column on MultiIndex + result = df_multi.groupby(['inner', 'B']).mean() + expected = df_multi.groupby([pd.Grouper(level='inner'), 'B']).mean() + assert_frame_equal(result, expected) + + # Column and Index on single Index + result = df_single.groupby(['B', 'inner']).mean() + expected = df_single.groupby(['B', pd.Grouper(level='inner')]).mean() + assert_frame_equal(result, expected) + + # Index and Column on single Index + result = df_single.groupby(['inner', 'B']).mean() + expected = df_single.groupby([pd.Grouper(level='inner'), 'B']).mean() + assert_frame_equal(result, expected) + + # Single element list of Index on MultiIndex + result = df_multi.groupby(['inner']).mean() + expected = df_multi.groupby(pd.Grouper(level='inner')).mean() + assert_frame_equal(result, expected) + + # Single element list of Index on single Index + result = df_single.groupby(['inner']).mean() + expected = df_single.groupby(pd.Grouper(level='inner')).mean() + assert_frame_equal(result, expected) + + # Index on MultiIndex + result = df_multi.groupby('inner').mean() + expected = df_multi.groupby(pd.Grouper(level='inner')).mean() + assert_frame_equal(result, expected) + + # Index on single Index + result = df_single.groupby('inner').mean() + expected = df_single.groupby(pd.Grouper(level='inner')).mean() + assert_frame_equal(result, expected) + + def test_grouper_column_takes_precedence_over_level(self): + # GH 5677, when a string passed as the `by` parameter + # matches a column and an index level the column takes + # precedence + + idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), + ('b', 1), ('b', 2), ('b', 3)]) + idx.names = ['outer', 'inner'] + df_multi_both = pd.DataFrame({"A": np.arange(6), + 'B': ['one', 'one', 'two', + 'two', 'one', 'one'], + 'inner': [1, 1, 1, 1, 1, 1]}, + index=idx) + + df_single_both = df_multi_both.reset_index('outer') + + # Group MultiIndex by single key + result = df_multi_both.groupby('inner').mean() + expected = df_multi_both.groupby(pd.Grouper(key='inner')).mean() + assert_frame_equal(result, expected) + not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() + assert not result.index.equals(not_expected.index) + + # Group single Index by single key + result = df_single_both.groupby('inner').mean() + expected = df_single_both.groupby(pd.Grouper(key='inner')).mean() + assert_frame_equal(result, expected) + not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() + assert not result.index.equals(not_expected.index) + + # Group MultiIndex by single key list + result = df_multi_both.groupby(['inner']).mean() + expected = df_multi_both.groupby(pd.Grouper(key='inner')).mean() + assert_frame_equal(result, expected) + not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() + assert not result.index.equals(not_expected.index) + + # Group single Index by single key list + result = df_single_both.groupby(['inner']).mean() + expected = df_single_both.groupby(pd.Grouper(key='inner')).mean() + assert_frame_equal(result, expected) + not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() + assert not result.index.equals(not_expected.index) + + # Group MultiIndex by two keys (1) + result = df_multi_both.groupby(['B', 'inner']).mean() + expected = df_multi_both.groupby(['B', + pd.Grouper(key='inner')]).mean() + assert_frame_equal(result, expected) + + not_expected = df_multi_both.groupby(['B', + pd.Grouper(level='inner') + ]).mean() + assert not result.index.equals(not_expected.index) + + # Group MultiIndex by two keys (2) + result = df_multi_both.groupby(['inner', 'B']).mean() + expected = df_multi_both.groupby([pd.Grouper(key='inner'), + 'B']).mean() + assert_frame_equal(result, expected) + + not_expected = df_multi_both.groupby([pd.Grouper(level='inner'), + 'B']).mean() + assert not result.index.equals(not_expected.index) + + # Group single Index by two keys (1) + result = df_single_both.groupby(['B', 'inner']).mean() + expected = df_single_both.groupby(['B', + pd.Grouper(key='inner')]).mean() + assert_frame_equal(result, expected) + + not_expected = df_single_both.groupby(['B', + pd.Grouper(level='inner') + ]).mean() + assert not result.index.equals(not_expected.index) + + # Group single Index by two keys (2) + result = df_single_both.groupby(['inner', 'B']).mean() + expected = df_single_both.groupby([pd.Grouper(key='inner'), + 'B']).mean() + assert_frame_equal(result, expected) + + not_expected = df_single_both.groupby([pd.Grouper(level='inner'), + 'B']).mean() + assert not result.index.equals(not_expected.index) + def test_grouper_getting_correct_binner(self): # GH 10063 From 355d70904fe2f5fc82826d58eca78e2dfaa6435e Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Wed, 12 Oct 2016 20:42:04 -0400 Subject: [PATCH 2/8] Implemented GH 5677 --- pandas/core/groupby.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index afddb86988970..297377b4e074f 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2449,8 +2449,13 @@ def is_in_obj(gpr): exclusions.append(name) elif is_in_axis(gpr): # df.groupby('name') - in_axis, name, gpr = True, gpr, obj[gpr] - exclusions.append(name) + if gpr in obj: + in_axis, name, gpr = True, gpr, obj[gpr] + exclusions.append(name) + elif gpr in obj.index.names: + in_axis, name, level, gpr = False, None, gpr, None + else: + raise KeyError(gpr) elif isinstance(gpr, Grouper) and gpr.key is not None: # Add key to exclusions exclusions.append(gpr.key) From b050aca842ad03d278886e7d7be6c8d0b6cf80b2 Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Wed, 12 Oct 2016 21:03:00 -0400 Subject: [PATCH 3/8] Documentation updates for GH 5677 --- doc/source/groupby.rst | 3 +++ doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/generic.py | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index c5a77770085d6..0813a6527bb95 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -94,6 +94,9 @@ The mapping can be specified many different ways: - For DataFrame objects, a string indicating a column to be used to group. Of course ``df.groupby('A')`` is just syntactic sugar for ``df.groupby(df['A'])``, but it makes life simpler + - For DataFrame objects, a string indicating an index level to be used to group. + If a string matches both an index level name and a column name then the column + takes precedence - A list of any of the above things Collectively we refer to the grouping objects as the **keys**. For example, diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 660300e1814e8..0d580847142ba 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -31,6 +31,7 @@ Other enhancements ^^^^^^^^^^^^^^^^^^ - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) +- Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now reference either column names or index level names (:issue:`5677`) .. _whatsnew_0200.api_breaking: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fbc6333dd6fdd..6dc233f73d4d9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3936,7 +3936,7 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, Parameters ---------- by : mapping function / list of functions, dict, Series, or tuple / - list of column names. + list of column names or index level names. Called on each element of the object index to determine the groups. If a dict or Series is passed, the Series or dict VALUES will be used to determine the groups From 5f93ddd499a26377768de1b7911638e1838f87e8 Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Wed, 26 Oct 2016 20:10:25 -0400 Subject: [PATCH 4/8] Added future warning on ambiguous case (GH 5677) --- pandas/core/groupby.py | 7 ++++ pandas/tests/test_groupby.py | 66 +++++++++++++++++++++++------------- 2 files changed, 50 insertions(+), 23 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 297377b4e074f..7019be3e2c9b1 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2450,6 +2450,13 @@ def is_in_obj(gpr): elif is_in_axis(gpr): # df.groupby('name') if gpr in obj: + if gpr in obj.index.names: + warnings.warn( + ("'%s' is both a column name and an index level.\n" + "Defaulting to column but " + "this will raise an ambiguity error in a " + "future version") % gpr, + FutureWarning, stacklevel=2) in_axis, name, gpr = True, gpr, obj[gpr] exclusions.append(name) elif gpr in obj.index.names: diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index a7043cf12bba7..7ea86e219b719 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -575,7 +575,7 @@ def test_grouper_column_and_index_sugar(self): expected = df_single.groupby(pd.Grouper(level='inner')).mean() assert_frame_equal(result, expected) - def test_grouper_column_takes_precedence_over_level(self): + def test_grouper_column_takes_precedence_over_level_with_warning(self): # GH 5677, when a string passed as the `by` parameter # matches a column and an index level the column takes # precedence @@ -592,74 +592,94 @@ def test_grouper_column_takes_precedence_over_level(self): df_single_both = df_multi_both.reset_index('outer') # Group MultiIndex by single key - result = df_multi_both.groupby('inner').mean() - expected = df_multi_both.groupby(pd.Grouper(key='inner')).mean() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df_multi_both.groupby('inner').mean() + + expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() + assert_frame_equal(result, expected) not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() - assert not result.index.equals(not_expected.index) + self.assertFalse(result.index.equals(not_expected.index)) # Group single Index by single key - result = df_single_both.groupby('inner').mean() - expected = df_single_both.groupby(pd.Grouper(key='inner')).mean() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df_single_both.groupby('inner').mean() + + expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() + assert_frame_equal(result, expected) not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() - assert not result.index.equals(not_expected.index) + self.assertFalse(result.index.equals(not_expected.index)) # Group MultiIndex by single key list - result = df_multi_both.groupby(['inner']).mean() - expected = df_multi_both.groupby(pd.Grouper(key='inner')).mean() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df_multi_both.groupby(['inner']).mean() + + expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() + assert_frame_equal(result, expected) not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() - assert not result.index.equals(not_expected.index) + self.assertFalse(result.index.equals(not_expected.index)) # Group single Index by single key list - result = df_single_both.groupby(['inner']).mean() - expected = df_single_both.groupby(pd.Grouper(key='inner')).mean() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df_single_both.groupby(['inner']).mean() + + expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() + assert_frame_equal(result, expected) not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() - assert not result.index.equals(not_expected.index) + self.assertFalse(result.index.equals(not_expected.index)) # Group MultiIndex by two keys (1) - result = df_multi_both.groupby(['B', 'inner']).mean() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df_multi_both.groupby(['B', 'inner']).mean() + expected = df_multi_both.groupby(['B', - pd.Grouper(key='inner')]).mean() + pd.Grouper(key='inner')]).mean() assert_frame_equal(result, expected) not_expected = df_multi_both.groupby(['B', pd.Grouper(level='inner') ]).mean() - assert not result.index.equals(not_expected.index) + self.assertFalse(result.index.equals(not_expected.index)) # Group MultiIndex by two keys (2) - result = df_multi_both.groupby(['inner', 'B']).mean() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df_multi_both.groupby(['inner', 'B']).mean() + expected = df_multi_both.groupby([pd.Grouper(key='inner'), 'B']).mean() assert_frame_equal(result, expected) not_expected = df_multi_both.groupby([pd.Grouper(level='inner'), 'B']).mean() - assert not result.index.equals(not_expected.index) + self.assertFalse(result.index.equals(not_expected.index)) # Group single Index by two keys (1) - result = df_single_both.groupby(['B', 'inner']).mean() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df_single_both.groupby(['B', 'inner']).mean() + expected = df_single_both.groupby(['B', - pd.Grouper(key='inner')]).mean() + pd.Grouper(key='inner')]).mean() assert_frame_equal(result, expected) not_expected = df_single_both.groupby(['B', pd.Grouper(level='inner') ]).mean() - assert not result.index.equals(not_expected.index) + self.assertFalse(result.index.equals(not_expected.index)) # Group single Index by two keys (2) - result = df_single_both.groupby(['inner', 'B']).mean() + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = df_single_both.groupby(['inner', 'B']).mean() + expected = df_single_both.groupby([pd.Grouper(key='inner'), 'B']).mean() assert_frame_equal(result, expected) not_expected = df_single_both.groupby([pd.Grouper(level='inner'), 'B']).mean() - assert not result.index.equals(not_expected.index) + self.assertFalse(result.index.equals(not_expected.index)) def test_grouper_getting_correct_binner(self): From 5325ee6073b7e65f225eff839d252cb90c0acf61 Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Wed, 26 Oct 2016 20:16:06 -0400 Subject: [PATCH 5/8] Indentation fix --- pandas/tests/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 7ea86e219b719..fafcdf76a806c 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -636,7 +636,7 @@ def test_grouper_column_takes_precedence_over_level_with_warning(self): result = df_multi_both.groupby(['B', 'inner']).mean() expected = df_multi_both.groupby(['B', - pd.Grouper(key='inner')]).mean() + pd.Grouper(key='inner')]).mean() assert_frame_equal(result, expected) not_expected = df_multi_both.groupby(['B', From b68af168178373fd17351d02a6a141a1345f6167 Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Thu, 27 Oct 2016 07:41:06 -0400 Subject: [PATCH 6/8] Added note explaining version 0.20 change and ambiguity resolution / warning --- doc/source/groupby.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 0813a6527bb95..95b4293b06d02 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -95,13 +95,20 @@ The mapping can be specified many different ways: course ``df.groupby('A')`` is just syntactic sugar for ``df.groupby(df['A'])``, but it makes life simpler - For DataFrame objects, a string indicating an index level to be used to group. - If a string matches both an index level name and a column name then the column - takes precedence - A list of any of the above things Collectively we refer to the grouping objects as the **keys**. For example, consider the following DataFrame: +.. note:: + + .. versionadded:: 0.20 + + A string passed to ``groupby`` may refer to either a column or an index level. + If a string matches both a column and an index level then a warning is issued + and the column takes precedence. This will result in an ambiguity error in a + future version. + .. ipython:: python df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', From 98af4d7e4166ff06bbaf0bef332e19c6372a8439 Mon Sep 17 00:00:00 2001 From: "Jon M. Mease" Date: Sat, 5 Nov 2016 11:40:03 -0400 Subject: [PATCH 7/8] Added example for grouping by combination of index level and column --- doc/source/groupby.rst | 79 ++++++++++++++++++++++++++------- doc/source/whatsnew/v0.20.0.txt | 13 ++++++ 2 files changed, 77 insertions(+), 15 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 95b4293b06d02..8d635d73933f1 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -105,9 +105,9 @@ consider the following DataFrame: .. versionadded:: 0.20 A string passed to ``groupby`` may refer to either a column or an index level. - If a string matches both a column and an index level then a warning is issued - and the column takes precedence. This will result in an ambiguity error in a - future version. + If a string matches both a column name and an index level name then a warning is + issued and the column takes precedence. This will result in an ambiguity error + in a future version. .. ipython:: python @@ -247,17 +247,6 @@ the length of the ``groups`` dict, so it is largely just a convenience: gb.aggregate gb.count gb.cumprod gb.dtype gb.first gb.groups gb.hist gb.max gb.min gb.nth gb.prod gb.resample gb.sum gb.var gb.apply gb.cummax gb.cumsum gb.fillna gb.gender gb.head gb.indices gb.mean gb.name gb.ohlc gb.quantile gb.size gb.tail gb.weight - -.. ipython:: python - :suppress: - - df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B' : ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C' : np.random.randn(8), - 'D' : np.random.randn(8)}) - .. _groupby.multiindex: GroupBy with MultiIndex @@ -299,7 +288,9 @@ chosen level: s.sum(level='second') -Also as of v0.6, grouping with multiple levels is supported. +.. versionadded:: 0.6 + +Grouping with multiple levels is supported. .. ipython:: python :suppress: @@ -316,8 +307,56 @@ Also as of v0.6, grouping with multiple levels is supported. s s.groupby(level=['first', 'second']).sum() +.. versionadded:: 0.20 + +Index level names may be supplied as keys. + +.. ipython:: python + + s.groupby(['first', 'second']).sum() + More on the ``sum`` function and aggregation later. +Grouping DataFrame with Index Levels and Columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A DataFrame may be grouped by a combination of columns and index levels by +specifying the column names as strings and the index levels as ``pd.Grouper`` +objects. + +.. ipython:: python + + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + + index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second']) + + df = pd.DataFrame({'A': [1, 1, 1, 1, 2, 2, 3, 3], + 'B': np.arange(8)}, + index=index) + + df + +The following example groups ``df`` by the ``second`` index level and +the ``A`` column. + +.. ipython:: python + + df.groupby([pd.Grouper(level=1), 'A']).sum() + +Index levels may also be specified by name. + +.. ipython:: python + + df.groupby([pd.Grouper(level='second'), 'A']).sum() + +.. versionadded:: 0.20 + +Index level names may be specified as keys directly to ``groupby``. + +.. ipython:: python + + df.groupby(['second', 'A']).sum() + DataFrame column selection in GroupBy ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -325,6 +364,16 @@ Once you have created the GroupBy object from a DataFrame, for example, you might want to do something different for each of the columns. Thus, using ``[]`` similar to getting a column from a DataFrame, you can do: +.. ipython:: python + :suppress: + + df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B' : ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C' : np.random.randn(8), + 'D' : np.random.randn(8)}) + .. ipython:: python grouped = df.groupby(['A']) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 0d580847142ba..0f06864af3a7c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -33,6 +33,19 @@ Other enhancements - ``pd.read_excel`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) - Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now reference either column names or index level names (:issue:`5677`) +.. ipython:: python + + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + + index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second']) + + df = pd.DataFrame({'A': [1, 1, 1, 1, 2, 2, 3, 3], + 'B': np.arange(8)}, + index=index) + + df.groupby(['second', 'A']).sum() + .. _whatsnew_0200.api_breaking: From 2d354612e9bce14367e918fa3a962e0e01aaf5de Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 14 Dec 2016 15:15:36 +0100 Subject: [PATCH 8/8] shorten test names --- pandas/tests/test_groupby.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index fafcdf76a806c..527ede5f4c9d9 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -521,7 +521,7 @@ def test_grouper_column_and_index(self): expected = df_single.reset_index().groupby(['inner', 'B']).mean() assert_frame_equal(result, expected) - def test_grouper_column_and_index_sugar(self): + def test_grouper_index_level_as_string(self): # GH 5677, allow strings passed as the `by` parameter to reference # columns or index levels @@ -575,7 +575,7 @@ def test_grouper_column_and_index_sugar(self): expected = df_single.groupby(pd.Grouper(level='inner')).mean() assert_frame_equal(result, expected) - def test_grouper_column_takes_precedence_over_level_with_warning(self): + def test_grouper_column_index_level_precedence(self): # GH 5677, when a string passed as the `by` parameter # matches a column and an index level the column takes # precedence @@ -596,7 +596,6 @@ def test_grouper_column_takes_precedence_over_level_with_warning(self): result = df_multi_both.groupby('inner').mean() expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() self.assertFalse(result.index.equals(not_expected.index)) @@ -606,7 +605,6 @@ def test_grouper_column_takes_precedence_over_level_with_warning(self): result = df_single_both.groupby('inner').mean() expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() self.assertFalse(result.index.equals(not_expected.index)) @@ -616,7 +614,6 @@ def test_grouper_column_takes_precedence_over_level_with_warning(self): result = df_multi_both.groupby(['inner']).mean() expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() self.assertFalse(result.index.equals(not_expected.index)) @@ -626,7 +623,6 @@ def test_grouper_column_takes_precedence_over_level_with_warning(self): result = df_single_both.groupby(['inner']).mean() expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() self.assertFalse(result.index.equals(not_expected.index)) @@ -638,7 +634,6 @@ def test_grouper_column_takes_precedence_over_level_with_warning(self): expected = df_multi_both.groupby(['B', pd.Grouper(key='inner')]).mean() assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby(['B', pd.Grouper(level='inner') ]).mean() @@ -651,7 +646,6 @@ def test_grouper_column_takes_precedence_over_level_with_warning(self): expected = df_multi_both.groupby([pd.Grouper(key='inner'), 'B']).mean() assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby([pd.Grouper(level='inner'), 'B']).mean() self.assertFalse(result.index.equals(not_expected.index)) @@ -663,7 +657,6 @@ def test_grouper_column_takes_precedence_over_level_with_warning(self): expected = df_single_both.groupby(['B', pd.Grouper(key='inner')]).mean() assert_frame_equal(result, expected) - not_expected = df_single_both.groupby(['B', pd.Grouper(level='inner') ]).mean() @@ -676,7 +669,6 @@ def test_grouper_column_takes_precedence_over_level_with_warning(self): expected = df_single_both.groupby([pd.Grouper(key='inner'), 'B']).mean() assert_frame_equal(result, expected) - not_expected = df_single_both.groupby([pd.Grouper(level='inner'), 'B']).mean() self.assertFalse(result.index.equals(not_expected.index))