Skip to content

Commit ddff1a2

Browse files
WillAydproost
authored andcommitted
Remove Ambiguous Behavior of Tuple as Grouping (pandas-dev#29755)
1 parent 5b25e33 commit ddff1a2

File tree

4 files changed

+23
-46
lines changed

4 files changed

+23
-46
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
406406
- Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`)
407407
- Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to False (:issue:`27600`)
408408
- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`)
409+
- A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`)
409410
- Removed :meth:`Series.from_array` (:issue:`18258`)
410411
- Removed :meth:`DataFrame.from_items` (:issue:`18458`)
411412
- Removed :meth:`DataFrame.as_matrix`, :meth:`Series.as_matrix` (:issue:`18458`)

pandas/core/groupby/groupby.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@ class providing the base-class of operations.
1414
import re
1515
import types
1616
from typing import (
17+
Callable,
1718
Dict,
1819
FrozenSet,
20+
Hashable,
1921
Iterable,
2022
List,
2123
Mapping,
@@ -343,14 +345,23 @@ def _group_selection_context(groupby):
343345
groupby._reset_group_selection()
344346

345347

348+
_KeysArgType = Union[
349+
Hashable,
350+
List[Hashable],
351+
Callable[[Hashable], Hashable],
352+
List[Callable[[Hashable], Hashable]],
353+
Mapping[Hashable, Hashable],
354+
]
355+
356+
346357
class _GroupBy(PandasObject, SelectionMixin):
347358
_group_selection = None
348359
_apply_whitelist: FrozenSet[str] = frozenset()
349360

350361
def __init__(
351362
self,
352363
obj: NDFrame,
353-
keys=None,
364+
keys: Optional[_KeysArgType] = None,
354365
axis: int = 0,
355366
level=None,
356367
grouper: "Optional[ops.BaseGrouper]" = None,
@@ -2504,7 +2515,7 @@ def _reindex_output(
25042515
@Appender(GroupBy.__doc__)
25052516
def get_groupby(
25062517
obj: NDFrame,
2507-
by=None,
2518+
by: Optional[_KeysArgType] = None,
25082519
axis: int = 0,
25092520
level=None,
25102521
grouper: "Optional[ops.BaseGrouper]" = None,

pandas/core/groupby/grouper.py

-24
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
"""
55

66
from typing import Hashable, List, Optional, Tuple
7-
import warnings
87

98
import numpy as np
109

@@ -14,7 +13,6 @@
1413
ensure_categorical,
1514
is_categorical_dtype,
1615
is_datetime64_dtype,
17-
is_hashable,
1816
is_list_like,
1917
is_scalar,
2018
is_timedelta64_dtype,
@@ -515,28 +513,6 @@ def get_grouper(
515513
elif isinstance(key, ops.BaseGrouper):
516514
return key, [], obj
517515

518-
# In the future, a tuple key will always mean an actual key,
519-
# not an iterable of keys. In the meantime, we attempt to provide
520-
# a warning. We can assume that the user wanted a list of keys when
521-
# the key is not in the index. We just have to be careful with
522-
# unhashable elements of `key`. Any unhashable elements implies that
523-
# they wanted a list of keys.
524-
# https://github.com/pandas-dev/pandas/issues/18314
525-
if isinstance(key, tuple):
526-
all_hashable = is_hashable(key)
527-
if (
528-
all_hashable and key not in obj and set(key).issubset(obj)
529-
) or not all_hashable:
530-
# column names ('a', 'b') -> ['a', 'b']
531-
# arrays like (a, b) -> [a, b]
532-
msg = (
533-
"Interpreting tuple 'by' as a list of keys, rather than "
534-
"a single key. Use 'by=[...]' instead of 'by=(...)'. In "
535-
"the future, a tuple will always mean a single key."
536-
)
537-
warnings.warn(msg, FutureWarning, stacklevel=5)
538-
key = list(key)
539-
540516
if not isinstance(key, list):
541517
keys = [key]
542518
match_axis_length = False

pandas/tests/groupby/test_groupby.py

+9-20
Original file line numberDiff line numberDiff line change
@@ -1734,34 +1734,23 @@ def test_empty_dataframe_groupby():
17341734
tm.assert_frame_equal(result, expected)
17351735

17361736

1737-
def test_tuple_warns():
1737+
def test_tuple_as_grouping():
17381738
# https://github.com/pandas-dev/pandas/issues/18314
17391739
df = pd.DataFrame(
17401740
{
1741-
("a", "b"): [1, 1, 2, 2],
1742-
"a": [1, 1, 1, 2],
1743-
"b": [1, 2, 2, 2],
1741+
("a", "b"): [1, 1, 1, 1],
1742+
"a": [2, 2, 2, 2],
1743+
"b": [2, 2, 2, 2],
17441744
"c": [1, 1, 1, 1],
17451745
}
17461746
)
1747-
with tm.assert_produces_warning(FutureWarning) as w:
1748-
df[["a", "b", "c"]].groupby(("a", "b")).c.mean()
17491747

1750-
assert "Interpreting tuple 'by' as a list" in str(w[0].message)
1748+
with pytest.raises(KeyError):
1749+
df[["a", "b", "c"]].groupby(("a", "b"))
17511750

1752-
with tm.assert_produces_warning(None):
1753-
df.groupby(("a", "b")).c.mean()
1754-
1755-
1756-
def test_tuple_warns_unhashable():
1757-
# https://github.com/pandas-dev/pandas/issues/18314
1758-
business_dates = date_range(start="4/1/2014", end="6/30/2014", freq="B")
1759-
df = DataFrame(1, index=business_dates, columns=["a", "b"])
1760-
1761-
with tm.assert_produces_warning(FutureWarning) as w:
1762-
df.groupby((df.index.year, df.index.month)).nth([0, 3, -1])
1763-
1764-
assert "Interpreting tuple 'by' as a list" in str(w[0].message)
1751+
result = df.groupby(("a", "b"))["c"].sum()
1752+
expected = pd.Series([4], name="c", index=pd.Index([1], name=("a", "b")))
1753+
tm.assert_series_equal(result, expected)
17651754

17661755

17671756
def test_tuple_correct_keyerror():

0 commit comments

Comments
 (0)