Skip to content

Commit 209ffce

Browse files
committed
COMPAT: Emit warning when groupby by a tuple
Closes pandas-dev#18314
1 parent 2aa4aa9 commit 209ffce

File tree

3 files changed

+29
-1
lines changed

3 files changed

+29
-1
lines changed

doc/source/whatsnew/v0.22.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,9 @@ Deprecations
202202
- ``Series.from_array`` and ``SparseSeries.from_array`` are deprecated. Use the normal constructor ``Series(..)`` and ``SparseSeries(..)`` instead (:issue:`18213`).
203203
- ``DataFrame.as_matrix`` is deprecated. Use ``DataFrame.values`` instead (:issue:`18458`).
204204
- ``Series.asobject``, ``DatetimeIndex.asobject``, ``PeriodIndex.asobject`` and ``TimeDeltaIndex.asobject`` have been deprecated. Use ``.astype(object)`` instead (:issue:`18572`)
205+
- Grouping by a tuple of keys now emits a ``FutureWarning`` and is deprecated.
206+
In the future, a tuple passed to ``'by'`` will always refer to a single key
207+
that is the actual tuple, instead of treating the tuple as multiple keys (:issue:`18314`)
205208

206209
.. _whatsnew_0220.prior_deprecations:
207210

pandas/core/groupby.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -2850,7 +2850,15 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
28502850
elif isinstance(key, BaseGrouper):
28512851
return key, [], obj
28522852

2853-
# Everything which is not a list is a key (including tuples):
2853+
tuple_as_list = isinstance(key, tuple) and key not in obj
2854+
if tuple_as_list:
2855+
msg = ("Interpreting tuple 'by' as a list of keys, rather than "
2856+
"a single key. Use 'by={!r}' instead of 'by={!r}'. In the "
2857+
"future, a tuple will always mean a single key.".format(
2858+
list(key), key))
2859+
warnings.warn(msg, FutureWarning, stacklevel=5)
2860+
key = list(key)
2861+
28542862
if not isinstance(key, list):
28552863
keys = [key]
28562864
match_axis_length = False

pandas/tests/groupby/test_groupby.py

+17
Original file line numberDiff line numberDiff line change
@@ -2727,6 +2727,23 @@ def test_empty_dataframe_groupby(self):
27272727

27282728
assert_frame_equal(result, expected)
27292729

2730+
def test_tuple_warns(self):
2731+
# https://github.com/pandas-dev/pandas/issues/18314
2732+
df = pd.DataFrame({('a', 'b'): [1, 1, 2, 2], 'a': [1, 1, 1, 2],
2733+
'b': [1, 2, 2, 2], 'c': [1, 1, 1, 1]})
2734+
with tm.assert_produces_warning(FutureWarning) as w:
2735+
df[['a', 'b', 'c']].groupby(('a', 'b')).c.mean()
2736+
2737+
assert "Interpreting tuple 'by' as a list" in str(w[0].message)
2738+
2739+
with tm.assert_produces_warning(FutureWarning) as w:
2740+
df[['a', 'b', 'c']].groupby(('a', 'b')).c.mean()
2741+
2742+
assert "Interpreting tuple 'by' as a list" in str(w[0].message)
2743+
2744+
with tm.assert_produces_warning(None):
2745+
df.groupby(('a', 'b')).c.mean()
2746+
27302747

27312748
def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
27322749
tups = lmap(tuple, df[keys].values)

0 commit comments

Comments
 (0)