Skip to content

Commit e001500

Browse files
jonmmeasejreback
authored andcommitted
Refactor index-as-string groupby tests and fix spurious warning (Bug 17383) (#17843)
1 parent 7a2891b commit e001500

File tree

4 files changed

+118
-153
lines changed

4 files changed

+118
-153
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -980,6 +980,7 @@ Groupby/Resample/Rolling
980980
- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`)
981981
- Bug in ``groupby.nunique()`` with ``TimeGrouper`` which cannot handle ``NaT`` correctly (:issue:`17575`)
982982
- Bug in ``DataFrame.groupby`` where a single level selection from a ``MultiIndex`` unexpectedly sorts (:issue:`17537`)
983+
- Bug in ``DataFrame.groupby`` where spurious warning is raised when ``Grouper`` object is used to override ambiguous column name (:issue:`17383`)
983984
- Bug in ``TimeGrouper`` differs when passes as a list and as a scalar (:issue:`17530`)
984985

985986
Sparse

pandas/core/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2703,7 +2703,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
27032703

27042704
# a passed-in Grouper, directly convert
27052705
if isinstance(key, Grouper):
2706-
binner, grouper, obj = key._get_grouper(obj)
2706+
binner, grouper, obj = key._get_grouper(obj, validate=False)
27072707
if key.key is None:
27082708
return grouper, [], obj
27092709
else:

pandas/tests/groupby/test_groupby.py

-152
Original file line numberDiff line numberDiff line change
@@ -253,158 +253,6 @@ def test_grouper_column_and_index(self):
253253
expected = df_single.reset_index().groupby(['inner', 'B']).mean()
254254
assert_frame_equal(result, expected)
255255

256-
def test_grouper_index_level_as_string(self):
257-
# GH 5677, allow strings passed as the `by` parameter to reference
258-
# columns or index levels
259-
260-
idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3),
261-
('b', 1), ('b', 2), ('b', 3)])
262-
idx.names = ['outer', 'inner']
263-
df_multi = pd.DataFrame({"A": np.arange(6),
264-
'B': ['one', 'one', 'two',
265-
'two', 'one', 'one']},
266-
index=idx)
267-
268-
df_single = df_multi.reset_index('outer')
269-
270-
# Column and Index on MultiIndex
271-
result = df_multi.groupby(['B', 'inner']).mean()
272-
expected = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean()
273-
assert_frame_equal(result, expected)
274-
275-
# Index and Column on MultiIndex
276-
result = df_multi.groupby(['inner', 'B']).mean()
277-
expected = df_multi.groupby([pd.Grouper(level='inner'), 'B']).mean()
278-
assert_frame_equal(result, expected)
279-
280-
# Column and Index on single Index
281-
result = df_single.groupby(['B', 'inner']).mean()
282-
expected = df_single.groupby(['B', pd.Grouper(level='inner')]).mean()
283-
assert_frame_equal(result, expected)
284-
285-
# Index and Column on single Index
286-
result = df_single.groupby(['inner', 'B']).mean()
287-
expected = df_single.groupby([pd.Grouper(level='inner'), 'B']).mean()
288-
assert_frame_equal(result, expected)
289-
290-
# Single element list of Index on MultiIndex
291-
result = df_multi.groupby(['inner']).mean()
292-
expected = df_multi.groupby(pd.Grouper(level='inner')).mean()
293-
assert_frame_equal(result, expected)
294-
295-
# Single element list of Index on single Index
296-
result = df_single.groupby(['inner']).mean()
297-
expected = df_single.groupby(pd.Grouper(level='inner')).mean()
298-
assert_frame_equal(result, expected)
299-
300-
# Index on MultiIndex
301-
result = df_multi.groupby('inner').mean()
302-
expected = df_multi.groupby(pd.Grouper(level='inner')).mean()
303-
assert_frame_equal(result, expected)
304-
305-
# Index on single Index
306-
result = df_single.groupby('inner').mean()
307-
expected = df_single.groupby(pd.Grouper(level='inner')).mean()
308-
assert_frame_equal(result, expected)
309-
310-
def test_grouper_column_index_level_precedence(self):
311-
# GH 5677, when a string passed as the `by` parameter
312-
# matches a column and an index level the column takes
313-
# precedence
314-
315-
idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3),
316-
('b', 1), ('b', 2), ('b', 3)])
317-
idx.names = ['outer', 'inner']
318-
df_multi_both = pd.DataFrame({"A": np.arange(6),
319-
'B': ['one', 'one', 'two',
320-
'two', 'one', 'one'],
321-
'inner': [1, 1, 1, 1, 1, 1]},
322-
index=idx)
323-
324-
df_single_both = df_multi_both.reset_index('outer')
325-
326-
# Group MultiIndex by single key
327-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
328-
result = df_multi_both.groupby('inner').mean()
329-
330-
expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean()
331-
assert_frame_equal(result, expected)
332-
not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean()
333-
assert not result.index.equals(not_expected.index)
334-
335-
# Group single Index by single key
336-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
337-
result = df_single_both.groupby('inner').mean()
338-
339-
expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean()
340-
assert_frame_equal(result, expected)
341-
not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean()
342-
assert not result.index.equals(not_expected.index)
343-
344-
# Group MultiIndex by single key list
345-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
346-
result = df_multi_both.groupby(['inner']).mean()
347-
348-
expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean()
349-
assert_frame_equal(result, expected)
350-
not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean()
351-
assert not result.index.equals(not_expected.index)
352-
353-
# Group single Index by single key list
354-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
355-
result = df_single_both.groupby(['inner']).mean()
356-
357-
expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean()
358-
assert_frame_equal(result, expected)
359-
not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean()
360-
assert not result.index.equals(not_expected.index)
361-
362-
# Group MultiIndex by two keys (1)
363-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
364-
result = df_multi_both.groupby(['B', 'inner']).mean()
365-
366-
expected = df_multi_both.groupby(['B',
367-
pd.Grouper(key='inner')]).mean()
368-
assert_frame_equal(result, expected)
369-
not_expected = df_multi_both.groupby(['B',
370-
pd.Grouper(level='inner')
371-
]).mean()
372-
assert not result.index.equals(not_expected.index)
373-
374-
# Group MultiIndex by two keys (2)
375-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
376-
result = df_multi_both.groupby(['inner', 'B']).mean()
377-
378-
expected = df_multi_both.groupby([pd.Grouper(key='inner'),
379-
'B']).mean()
380-
assert_frame_equal(result, expected)
381-
not_expected = df_multi_both.groupby([pd.Grouper(level='inner'),
382-
'B']).mean()
383-
assert not result.index.equals(not_expected.index)
384-
385-
# Group single Index by two keys (1)
386-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
387-
result = df_single_both.groupby(['B', 'inner']).mean()
388-
389-
expected = df_single_both.groupby(['B',
390-
pd.Grouper(key='inner')]).mean()
391-
assert_frame_equal(result, expected)
392-
not_expected = df_single_both.groupby(['B',
393-
pd.Grouper(level='inner')
394-
]).mean()
395-
assert not result.index.equals(not_expected.index)
396-
397-
# Group single Index by two keys (2)
398-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
399-
result = df_single_both.groupby(['inner', 'B']).mean()
400-
401-
expected = df_single_both.groupby([pd.Grouper(key='inner'),
402-
'B']).mean()
403-
assert_frame_equal(result, expected)
404-
not_expected = df_single_both.groupby([pd.Grouper(level='inner'),
405-
'B']).mean()
406-
assert not result.index.equals(not_expected.index)
407-
408256
def test_grouper_getting_correct_binner(self):
409257

410258
# GH 10063
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import pytest
2+
import pandas as pd
3+
import numpy as np
4+
5+
from pandas.util.testing import assert_frame_equal, assert_series_equal
6+
import pandas.util.testing as tm
7+
8+
9+
@pytest.fixture(params=[['inner'], ['inner', 'outer']])
10+
def frame(request):
11+
levels = request.param
12+
df = pd.DataFrame({'outer': ['a', 'a', 'a', 'b', 'b', 'b'],
13+
'inner': [1, 2, 3, 1, 2, 3],
14+
'A': np.arange(6),
15+
'B': ['one', 'one', 'two', 'two', 'one', 'one']})
16+
if levels:
17+
df = df.set_index(levels)
18+
19+
return df
20+
21+
22+
@pytest.fixture()
23+
def series():
24+
df = pd.DataFrame({'outer': ['a', 'a', 'a', 'b', 'b', 'b'],
25+
'inner': [1, 2, 3, 1, 2, 3],
26+
'A': np.arange(6),
27+
'B': ['one', 'one', 'two', 'two', 'one', 'one']})
28+
s = df.set_index(['outer', 'inner', 'B'])['A']
29+
30+
return s
31+
32+
33+
@pytest.mark.parametrize('key_strs,groupers', [
34+
('inner', # Index name
35+
pd.Grouper(level='inner')
36+
),
37+
(['inner'], # List of index name
38+
[pd.Grouper(level='inner')]
39+
),
40+
(['B', 'inner'], # Column and index
41+
['B', pd.Grouper(level='inner')]
42+
),
43+
(['inner', 'B'], # Index and column
44+
[pd.Grouper(level='inner'), 'B'])])
45+
def test_grouper_index_level_as_string(frame, key_strs, groupers):
46+
result = frame.groupby(key_strs).mean()
47+
expected = frame.groupby(groupers).mean()
48+
assert_frame_equal(result, expected)
49+
50+
51+
@pytest.mark.parametrize('levels', [
52+
'inner', 'outer', 'B',
53+
['inner'], ['outer'], ['B'],
54+
['inner', 'outer'], ['outer', 'inner'],
55+
['inner', 'outer', 'B'], ['B', 'outer', 'inner']
56+
])
57+
def test_grouper_index_level_as_string_series(series, levels):
58+
59+
# Compute expected result
60+
if isinstance(levels, list):
61+
groupers = [pd.Grouper(level=lv) for lv in levels]
62+
else:
63+
groupers = pd.Grouper(level=levels)
64+
65+
expected = series.groupby(groupers).mean()
66+
67+
# Compute and check result
68+
result = series.groupby(levels).mean()
69+
assert_series_equal(result, expected)
70+
71+
72+
@pytest.mark.parametrize('key_strs,key_groupers,level_groupers', [
73+
('inner', # Index name
74+
pd.Grouper(key='inner'),
75+
pd.Grouper(level='inner'),
76+
),
77+
(['inner'], # List of index name
78+
[pd.Grouper(key='inner')],
79+
[pd.Grouper(level='inner')]
80+
),
81+
(['B', 'inner'], # Column and index
82+
['B', pd.Grouper(key='inner')],
83+
['B', pd.Grouper(level='inner')]
84+
),
85+
(['inner', 'B'], # Index and column
86+
[pd.Grouper(key='inner'), 'B'],
87+
[pd.Grouper(level='inner'), 'B'])])
88+
def test_grouper_column_index_level_precedence(frame,
89+
key_strs,
90+
key_groupers,
91+
level_groupers):
92+
93+
# GH 5677, when a string passed as the `by` parameter
94+
# matches a column and an index level the column takes
95+
# precedence and a FutureWarning is raised
96+
97+
# Add 'inner' column to frame
98+
# (frame already has an 'inner' index)
99+
frame['inner'] = [1, 1, 1, 1, 1, 1]
100+
101+
# Performing a groupby with strings should produce warning
102+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
103+
result = frame.groupby(key_strs).mean()
104+
105+
# Grouping with key Grouper should produce the same result and no warning
106+
with tm.assert_produces_warning(False):
107+
expected = frame.groupby(key_groupers).mean()
108+
109+
assert_frame_equal(result, expected)
110+
111+
# Grouping with level Grouper should produce a difference result but
112+
# still no warning
113+
with tm.assert_produces_warning(False):
114+
not_expected = frame.groupby(level_groupers).mean()
115+
116+
assert not result.index.equals(not_expected.index)

0 commit comments

Comments
 (0)