Skip to content

Commit 5e44744

Browse files
committed
API: Disallow dict as agg parameter during groupby
Grouped, rolled, and resample Series / DataFrames will now disallow dicts / nested dicts respectively as parameters to aggregation (was deprecated before). xref pandas-devgh-15931.
1 parent da9d851 commit 5e44744

File tree

8 files changed

+28
-303
lines changed

8 files changed

+28
-303
lines changed

doc/source/whatsnew/v0.24.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -941,6 +941,8 @@ Removal of prior version deprecations/changes
941941
- Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`)
942942
- Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`)
943943
- Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`)
944+
- Grouped, rolled, and resampled ``Series`` will now raise a ``ValueError`` when a dictionary is passed in during aggregation (:issue:`15931`)
945+
- Grouped, rolled, and resampled ``DataFrame`` will now raise a ``ValueError`` when a nested dictionary is passed in during aggregation (:issue:`15931`)
944946
- :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats`` (:issue:`14645`)
945947
- Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`, :issue:`6581`)
946948
- Removed the ``pandas.formats.style`` shim for :class:`pandas.io.formats.style.Styler` (:issue:`16059`)

pandas/core/base.py

+7-11
Original file line numberDiff line numberDiff line change
@@ -354,14 +354,10 @@ def _aggregate(self, arg, *args, **kwargs):
354354

355355
obj = self._selected_obj
356356

357-
def nested_renaming_depr(level=4):
358-
# deprecation of nested renaming
359-
# GH 15931
360-
warnings.warn(
361-
("using a dict with renaming "
362-
"is deprecated and will be removed in a future "
363-
"version"),
364-
FutureWarning, stacklevel=level)
357+
def raise_on_dict_renaming():
358+
# Originally deprecated in gh-15931, now enforcing.
359+
rename_msg_err = "Using a dict with renaming is not allowed"
360+
raise ValueError(rename_msg_err)
365361

366362
# if we have a dict of any non-scalars
367363
# eg. {'A' : ['mean']}, normalize all to
@@ -391,10 +387,10 @@ def nested_renaming_depr(level=4):
391387
msg = ('cannot perform renaming for {key} with a '
392388
'nested dictionary').format(key=k)
393389
raise SpecificationError(msg)
394-
nested_renaming_depr(4 + (_level or 0))
390+
raise_on_dict_renaming()
395391

396392
elif isinstance(obj, ABCSeries):
397-
nested_renaming_depr()
393+
raise_on_dict_renaming()
398394
elif (isinstance(obj, ABCDataFrame) and
399395
k not in obj.columns):
400396
raise KeyError(
@@ -408,7 +404,7 @@ def nested_renaming_depr(level=4):
408404
keys = list(compat.iterkeys(arg))
409405
if (isinstance(obj, ABCDataFrame) and
410406
len(obj.columns.intersection(keys)) != len(keys)):
411-
nested_renaming_depr()
407+
raise_on_dict_renaming()
412408

413409
from pandas.core.reshape.concat import concat
414410

pandas/core/groupby/generic.py

+3-10
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
import collections
1010
import copy
11-
import warnings
1211
from functools import partial
1312
from textwrap import dedent
1413

@@ -785,15 +784,9 @@ def aggregate(self, func_or_funcs, *args, **kwargs):
785784
def _aggregate_multiple_funcs(self, arg, _level):
786785
if isinstance(arg, dict):
787786

788-
# show the deprecation, but only if we
789-
# have not shown a higher level one
790-
# GH 15931
791-
if isinstance(self._selected_obj, Series) and _level <= 1:
792-
warnings.warn(
793-
("using a dict on a Series for aggregation\n"
794-
"is deprecated and will be removed in a future "
795-
"version"),
796-
FutureWarning, stacklevel=3)
787+
# Deprecated in gh-15931, now enforcing.
788+
if isinstance(self._selected_obj, Series):
789+
raise ValueError("Using a dict with renaming is not allowed")
797790

798791
columns = list(arg.keys())
799792
arg = list(arg.items())

pandas/tests/groupby/aggregate/test_aggregate.py

-46
Original file line numberDiff line numberDiff line change
@@ -241,49 +241,3 @@ def test_more_flexible_frame_multi_function(df):
241241
expected = grouped.aggregate(OrderedDict([['C', np.mean],
242242
['D', [np.mean, np.std]]]))
243243
tm.assert_frame_equal(result, expected)
244-
245-
def foo(x):
246-
return np.mean(x)
247-
248-
def bar(x):
249-
return np.std(x, ddof=1)
250-
251-
# this uses column selection & renaming
252-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
253-
d = OrderedDict([['C', np.mean],
254-
['D', OrderedDict([['foo', np.mean],
255-
['bar', np.std]])]])
256-
result = grouped.aggregate(d)
257-
258-
d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]])
259-
expected = grouped.aggregate(d)
260-
261-
tm.assert_frame_equal(result, expected)
262-
263-
264-
def test_multi_function_flexible_mix(df):
265-
# GH #1268
266-
grouped = df.groupby('A')
267-
268-
# Expected
269-
d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
270-
['D', {'sum': 'sum'}]])
271-
# this uses column selection & renaming
272-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
273-
expected = grouped.aggregate(d)
274-
275-
# Test 1
276-
d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
277-
['D', 'sum']])
278-
# this uses column selection & renaming
279-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
280-
result = grouped.aggregate(d)
281-
tm.assert_frame_equal(result, expected)
282-
283-
# Test 2
284-
d = OrderedDict([['C', OrderedDict([['foo', 'mean'], ['bar', 'std']])],
285-
['D', ['sum']]])
286-
# this uses column selection & renaming
287-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
288-
result = grouped.aggregate(d)
289-
tm.assert_frame_equal(result, expected)

pandas/tests/groupby/aggregate/test_other.py

+5-82
Original file line numberDiff line numberDiff line change
@@ -195,103 +195,26 @@ def test_aggregate_api_consistency():
195195
expected = pd.concat([d_sum, c_mean], axis=1)
196196
tm.assert_frame_equal(result, expected, check_like=True)
197197

198-
result = grouped.agg({'C': ['mean', 'sum'],
199-
'D': ['mean', 'sum']})
200-
expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1)
201-
expected.columns = MultiIndex.from_product([['C', 'D'],
202-
['mean', 'sum']])
203-
204-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
205-
result = grouped[['D', 'C']].agg({'r': np.sum,
206-
'r2': np.mean})
207-
expected = pd.concat([d_sum, c_sum, d_mean, c_mean], axis=1)
208-
expected.columns = MultiIndex.from_product([['r', 'r2'],
209-
['D', 'C']])
210-
tm.assert_frame_equal(result, expected, check_like=True)
211-
212-
213-
def test_agg_dict_renaming_deprecation():
214-
# 15931
215-
df = pd.DataFrame({'A': [1, 1, 1, 2, 2],
216-
'B': range(5),
217-
'C': range(5)})
218-
219-
with tm.assert_produces_warning(FutureWarning,
220-
check_stacklevel=False) as w:
221-
df.groupby('A').agg({'B': {'foo': ['sum', 'max']},
222-
'C': {'bar': ['count', 'min']}})
223-
assert "using a dict with renaming" in str(w[0].message)
224-
225-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
226-
df.groupby('A')[['B', 'C']].agg({'ma': 'max'})
227-
228-
with tm.assert_produces_warning(FutureWarning) as w:
229-
df.groupby('A').B.agg({'foo': 'count'})
230-
assert "using a dict on a Series for aggregation" in str(w[0].message)
231-
232-
233-
def test_agg_compat():
234-
# GH 12334
235-
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
236-
'foo', 'bar', 'foo', 'foo'],
237-
'B': ['one', 'one', 'two', 'two',
238-
'two', 'two', 'one', 'two'],
239-
'C': np.random.randn(8) + 1.0,
240-
'D': np.arange(8)})
241-
242-
g = df.groupby(['A', 'B'])
243-
244-
expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1)
245-
expected.columns = MultiIndex.from_tuples([('C', 'sum'),
246-
('C', 'std')])
247-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
248-
result = g['D'].agg({'C': ['sum', 'std']})
249-
tm.assert_frame_equal(result, expected, check_like=True)
250-
251-
expected = pd.concat([g['D'].sum(), g['D'].std()], axis=1)
252-
expected.columns = ['C', 'D']
253-
254-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
255-
result = g['D'].agg({'C': 'sum', 'D': 'std'})
256-
tm.assert_frame_equal(result, expected, check_like=True)
257-
258198

259199
def test_agg_nested_dicts():
260-
# API change for disallowing these types of nested dicts
200+
# API change for disallowing these types of nested dicts.
261201
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
262202
'foo', 'bar', 'foo', 'foo'],
263203
'B': ['one', 'one', 'two', 'two',
264204
'two', 'two', 'one', 'two'],
265205
'C': np.random.randn(8) + 1.0,
266206
'D': np.arange(8)})
267-
268207
g = df.groupby(['A', 'B'])
269208

270209
msg = r'cannot perform renaming for r[1-2] with a nested dictionary'
271210
with tm.assert_raises_regex(SpecificationError, msg):
272211
g.aggregate({'r1': {'C': ['mean', 'sum']},
273212
'r2': {'D': ['mean', 'sum']}})
274213

275-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
276-
result = g.agg({'C': {'ra': ['mean', 'std']},
277-
'D': {'rb': ['mean', 'std']}})
278-
expected = pd.concat([g['C'].mean(), g['C'].std(),
279-
g['D'].mean(), g['D'].std()],
280-
axis=1)
281-
expected.columns = pd.MultiIndex.from_tuples(
282-
[('ra', 'mean'), ('ra', 'std'),
283-
('rb', 'mean'), ('rb', 'std')])
284-
tm.assert_frame_equal(result, expected, check_like=True)
285-
286-
# same name as the original column
287-
# GH9052
288-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
289-
expected = g['D'].agg({'result1': np.sum, 'result2': np.mean})
290-
expected = expected.rename(columns={'result1': 'D'})
291-
292-
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
293-
result = g['D'].agg({'D': np.sum, 'result2': np.mean})
294-
tm.assert_frame_equal(result, expected, check_like=True)
214+
msg = "Using a dict with renaming is not allowed"
215+
with tm.assert_raises_regex(ValueError, msg):
216+
g.agg({'C': {'ra': ['mean', 'std']},
217+
'D': {'rb': ['mean', 'std']}})
295218

296219

297220
def test_agg_item_by_item_raise_typeerror():

pandas/tests/groupby/test_groupby.py

+4-18
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ def test_basic(dtype):
6161
check_index_type=False)
6262

6363
# complex agg
64-
agged = grouped.aggregate([np.mean, np.std])
64+
grouped.aggregate([np.mean, np.std])
6565

66-
with tm.assert_produces_warning(FutureWarning,
67-
check_stacklevel=False):
68-
agged = grouped.aggregate({'one': np.mean, 'two': np.std})
66+
msg = "Using a dict with renaming is not allowed"
67+
with tm.assert_raises_regex(ValueError, msg):
68+
grouped.aggregate({'one': np.mean, 'two': np.std})
6969

7070
group_constants = {0: 10, 1: 20, 2: 30}
7171
agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
@@ -444,11 +444,6 @@ def test_frame_set_name_single(df):
444444
result = grouped['C'].agg([np.mean, np.std])
445445
assert result.index.name == 'A'
446446

447-
with tm.assert_produces_warning(FutureWarning,
448-
check_stacklevel=False):
449-
result = grouped['C'].agg({'foo': np.mean, 'bar': np.std})
450-
assert result.index.name == 'A'
451-
452447

453448
def test_multi_func(df):
454449
col1 = df['A']
@@ -553,15 +548,6 @@ def test_groupby_as_index_agg(df):
553548
expected2['D'] = grouped.sum()['D']
554549
assert_frame_equal(result2, expected2)
555550

556-
grouped = df.groupby('A', as_index=True)
557-
expected3 = grouped['C'].sum()
558-
expected3 = DataFrame(expected3).rename(columns={'C': 'Q'})
559-
560-
with tm.assert_produces_warning(FutureWarning,
561-
check_stacklevel=False):
562-
result3 = grouped['C'].agg({'Q': np.sum})
563-
assert_frame_equal(result3, expected3)
564-
565551
# multi-key
566552

567553
grouped = df.groupby(['A', 'B'], as_index=False)

0 commit comments

Comments
 (0)