diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f04410ef63531..5727888044229 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -974,6 +974,7 @@ Groupby/Resample/Rolling - Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`) - Bug in ``groupby.nunique()`` with ``TimeGrouper`` which cannot handle ``NaT`` correctly (:issue:`17575`) - Bug in ``DataFrame.groupby`` where a single level selection from a ``MultiIndex`` unexpectedly sorts (:issue:`17537`) +- Bug in ``DataFrame.groupby`` where spurious warning is raised when ``Grouper`` object is used to override ambiguous column name (:issue:`17383`) - Bug in ``TimeGrouper`` differs when passes as a list and as a scalar (:issue:`17530`) Sparse diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 9518f17e5f4f1..54ced824b7353 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2704,7 +2704,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, # a passed-in Grouper, directly convert if isinstance(key, Grouper): - binner, grouper, obj = key._get_grouper(obj) + binner, grouper, obj = key._get_grouper(obj, validate=False) if key.key is None: return grouper, [], obj else: diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 657de9b589dc9..740526e262d16 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -253,158 +253,6 @@ def test_grouper_column_and_index(self): expected = df_single.reset_index().groupby(['inner', 'B']).mean() assert_frame_equal(result, expected) - def test_grouper_index_level_as_string(self): - # GH 5677, allow strings passed as the `by` parameter to reference - # columns or index levels - - idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), - ('b', 1), ('b', 2), ('b', 3)]) - idx.names = ['outer', 'inner'] - df_multi = pd.DataFrame({"A": np.arange(6), - 'B': ['one', 'one', 'two', - 'two', 'one', 'one']}, - index=idx) - - df_single = df_multi.reset_index('outer') - - # Column and Index on MultiIndex - result = df_multi.groupby(['B', 'inner']).mean() - expected = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean() - assert_frame_equal(result, expected) - - # Index and Column on MultiIndex - result = df_multi.groupby(['inner', 'B']).mean() - expected = df_multi.groupby([pd.Grouper(level='inner'), 'B']).mean() - assert_frame_equal(result, expected) - - # Column and Index on single Index - result = df_single.groupby(['B', 'inner']).mean() - expected = df_single.groupby(['B', pd.Grouper(level='inner')]).mean() - assert_frame_equal(result, expected) - - # Index and Column on single Index - result = df_single.groupby(['inner', 'B']).mean() - expected = df_single.groupby([pd.Grouper(level='inner'), 'B']).mean() - assert_frame_equal(result, expected) - - # Single element list of Index on MultiIndex - result = df_multi.groupby(['inner']).mean() - expected = df_multi.groupby(pd.Grouper(level='inner')).mean() - assert_frame_equal(result, expected) - - # Single element list of Index on single Index - result = df_single.groupby(['inner']).mean() - expected = df_single.groupby(pd.Grouper(level='inner')).mean() - assert_frame_equal(result, expected) - - # Index on MultiIndex - result = df_multi.groupby('inner').mean() - expected = df_multi.groupby(pd.Grouper(level='inner')).mean() - assert_frame_equal(result, expected) - - # Index on single Index - result = df_single.groupby('inner').mean() - expected = df_single.groupby(pd.Grouper(level='inner')).mean() - assert_frame_equal(result, expected) - - def test_grouper_column_index_level_precedence(self): - # GH 5677, when a string passed as the `by` parameter - # matches a column and an index level the column takes - # precedence - - idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), - ('b', 1), ('b', 2), ('b', 3)]) - idx.names = ['outer', 'inner'] - df_multi_both = pd.DataFrame({"A": np.arange(6), - 'B': ['one', 'one', 'two', - 'two', 'one', 'one'], - 'inner': [1, 1, 1, 1, 1, 1]}, - index=idx) - - df_single_both = df_multi_both.reset_index('outer') - - # Group MultiIndex by single key - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_multi_both.groupby('inner').mean() - - expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() - assert not result.index.equals(not_expected.index) - - # Group single Index by single key - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_single_both.groupby('inner').mean() - - expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() - assert not result.index.equals(not_expected.index) - - # Group MultiIndex by single key list - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_multi_both.groupby(['inner']).mean() - - expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() - assert not result.index.equals(not_expected.index) - - # Group single Index by single key list - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_single_both.groupby(['inner']).mean() - - expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() - assert not result.index.equals(not_expected.index) - - # Group MultiIndex by two keys (1) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_multi_both.groupby(['B', 'inner']).mean() - - expected = df_multi_both.groupby(['B', - pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby(['B', - pd.Grouper(level='inner') - ]).mean() - assert not result.index.equals(not_expected.index) - - # Group MultiIndex by two keys (2) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_multi_both.groupby(['inner', 'B']).mean() - - expected = df_multi_both.groupby([pd.Grouper(key='inner'), - 'B']).mean() - assert_frame_equal(result, expected) - not_expected = df_multi_both.groupby([pd.Grouper(level='inner'), - 'B']).mean() - assert not result.index.equals(not_expected.index) - - # Group single Index by two keys (1) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_single_both.groupby(['B', 'inner']).mean() - - expected = df_single_both.groupby(['B', - pd.Grouper(key='inner')]).mean() - assert_frame_equal(result, expected) - not_expected = df_single_both.groupby(['B', - pd.Grouper(level='inner') - ]).mean() - assert not result.index.equals(not_expected.index) - - # Group single Index by two keys (2) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df_single_both.groupby(['inner', 'B']).mean() - - expected = df_single_both.groupby([pd.Grouper(key='inner'), - 'B']).mean() - assert_frame_equal(result, expected) - not_expected = df_single_both.groupby([pd.Grouper(level='inner'), - 'B']).mean() - assert not result.index.equals(not_expected.index) - def test_grouper_getting_correct_binner(self): # GH 10063 diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py new file mode 100644 index 0000000000000..3b6e15036cfe2 --- /dev/null +++ b/pandas/tests/groupby/test_index_as_string.py @@ -0,0 +1,116 @@ +import pytest +import pandas as pd +import numpy as np + +from pandas.util.testing import assert_frame_equal, assert_series_equal +import pandas.util.testing as tm + + +@pytest.fixture(params=[['inner'], ['inner', 'outer']]) +def frame(request): + levels = request.param + df = pd.DataFrame({'outer': ['a', 'a', 'a', 'b', 'b', 'b'], + 'inner': [1, 2, 3, 1, 2, 3], + 'A': np.arange(6), + 'B': ['one', 'one', 'two', 'two', 'one', 'one']}) + if levels: + df = df.set_index(levels) + + return df + + +@pytest.fixture() +def series(): + df = pd.DataFrame({'outer': ['a', 'a', 'a', 'b', 'b', 'b'], + 'inner': [1, 2, 3, 1, 2, 3], + 'A': np.arange(6), + 'B': ['one', 'one', 'two', 'two', 'one', 'one']}) + s = df.set_index(['outer', 'inner', 'B'])['A'] + + return s + + +@pytest.mark.parametrize('key_strs,groupers', [ + ('inner', # Index name + pd.Grouper(level='inner') + ), + (['inner'], # List of index name + [pd.Grouper(level='inner')] + ), + (['B', 'inner'], # Column and index + ['B', pd.Grouper(level='inner')] + ), + (['inner', 'B'], # Index and column + [pd.Grouper(level='inner'), 'B'])]) +def test_grouper_index_level_as_string(frame, key_strs, groupers): + result = frame.groupby(key_strs).mean() + expected = frame.groupby(groupers).mean() + assert_frame_equal(result, expected) + + +@pytest.mark.parametrize('levels', [ + 'inner', 'outer', 'B', + ['inner'], ['outer'], ['B'], + ['inner', 'outer'], ['outer', 'inner'], + ['inner', 'outer', 'B'], ['B', 'outer', 'inner'] +]) +def test_grouper_index_level_as_string_series(series, levels): + + # Compute expected result + if isinstance(levels, list): + groupers = [pd.Grouper(level=lv) for lv in levels] + else: + groupers = pd.Grouper(level=levels) + + expected = series.groupby(groupers).mean() + + # Compute and check result + result = series.groupby(levels).mean() + assert_series_equal(result, expected) + + +@pytest.mark.parametrize('key_strs,key_groupers,level_groupers', [ + ('inner', # Index name + pd.Grouper(key='inner'), + pd.Grouper(level='inner'), + ), + (['inner'], # List of index name + [pd.Grouper(key='inner')], + [pd.Grouper(level='inner')] + ), + (['B', 'inner'], # Column and index + ['B', pd.Grouper(key='inner')], + ['B', pd.Grouper(level='inner')] + ), + (['inner', 'B'], # Index and column + [pd.Grouper(key='inner'), 'B'], + [pd.Grouper(level='inner'), 'B'])]) +def test_grouper_column_index_level_precedence(frame, + key_strs, + key_groupers, + level_groupers): + + # GH 5677, when a string passed as the `by` parameter + # matches a column and an index level the column takes + # precedence and a FutureWarning is raised + + # Add 'inner' column to frame + # (frame already has an 'inner' index) + frame['inner'] = [1, 1, 1, 1, 1, 1] + + # Performing a groupby with strings should produce warning + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = frame.groupby(key_strs).mean() + + # Grouping with key Grouper should produce the same result and no warning + with tm.assert_produces_warning(False): + expected = frame.groupby(key_groupers).mean() + + assert_frame_equal(result, expected) + + # Grouping with level Grouper should produce a difference result but + # still no warning + with tm.assert_produces_warning(False): + not_expected = frame.groupby(level_groupers).mean() + + assert not result.index.equals(not_expected.index)