From 209ffcefabf41886ea25afd7a9ff38a65b129b6c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 11 Dec 2017 13:04:50 -0600
Subject: [PATCH 01/10] COMPAT: Emit warning when groupby by a tuple

Closes https://github.com/pandas-dev/pandas/issues/18314
---
 doc/source/whatsnew/v0.22.0.txt      |  3 +++
 pandas/core/groupby.py               | 10 +++++++++-
 pandas/tests/groupby/test_groupby.py | 17 +++++++++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
index 2ea44722d343d..2eef25c0a8c5b 100644
--- a/doc/source/whatsnew/v0.22.0.txt
+++ b/doc/source/whatsnew/v0.22.0.txt
@@ -202,6 +202,9 @@ Deprecations
 - ``Series.from_array`` and ``SparseSeries.from_array`` are deprecated. Use the normal constructor ``Series(..)`` and ``SparseSeries(..)`` instead (:issue:`18213`).
 - ``DataFrame.as_matrix`` is deprecated. Use ``DataFrame.values`` instead (:issue:`18458`).
 - ``Series.asobject``, ``DatetimeIndex.asobject``, ``PeriodIndex.asobject`` and ``TimeDeltaIndex.asobject`` have been deprecated. Use ``.astype(object)`` instead (:issue:`18572`)
+- Grouping by a tuple of keys now emits a ``FutureWarning`` and is deprecated.
+  In the future, a tuple passed to ``'by'`` will always refer to a single key
+  that is the actual tuple, instead of treating the tuple as multiple keys (:issue:`18314`)
 
 .. _whatsnew_0220.prior_deprecations:
 
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index a5d8cc254cd93..664db6b66a3ed 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2850,7 +2850,15 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
     elif isinstance(key, BaseGrouper):
         return key, [], obj
 
-    # Everything which is not a list is a key (including tuples):
+    tuple_as_list = isinstance(key, tuple) and key not in obj
+    if tuple_as_list:
+        msg = ("Interpreting tuple 'by' as a list of keys, rather than "
+               "a single key. Use 'by={!r}' instead of 'by={!r}'. In the "
+               "future, a tuple will always mean a single key.".format(
+                   list(key), key))
+        warnings.warn(msg, FutureWarning, stacklevel=5)
+        key = list(key)
+
     if not isinstance(key, list):
         keys = [key]
         match_axis_length = False
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 3436dd9169081..e74f73768e376 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2727,6 +2727,23 @@ def test_empty_dataframe_groupby(self):
 
         assert_frame_equal(result, expected)
 
+    def test_tuple_warns(self):
+        # https://github.com/pandas-dev/pandas/issues/18314
+        df = pd.DataFrame({('a', 'b'): [1, 1, 2, 2], 'a': [1, 1, 1, 2],
+                           'b': [1, 2, 2, 2], 'c': [1, 1, 1, 1]})
+        with tm.assert_produces_warning(FutureWarning) as w:
+            df[['a', 'b', 'c']].groupby(('a', 'b')).c.mean()
+
+        assert "Interpreting tuple 'by' as a list" in str(w[0].message)
+
+        with tm.assert_produces_warning(FutureWarning) as w:
+            df[['a', 'b', 'c']].groupby(('a', 'b')).c.mean()
+
+        assert "Interpreting tuple 'by' as a list" in str(w[0].message)
+
+        with tm.assert_produces_warning(None):
+            df.groupby(('a', 'b')).c.mean()
+
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = lmap(tuple, df[keys].values)

From ad09ade25426edf47dfb1e92e39b1b5a3fef694a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 11 Dec 2017 13:34:07 -0600
Subject: [PATCH 02/10] DOC: avoid future warning

---
 doc/source/groupby.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index 316244b583aa2..552ddabb7359a 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -1091,7 +1091,7 @@ You can also select multiple rows from each group by specifying multiple nth val
    business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', freq='B')
    df = pd.DataFrame(1, index=business_dates, columns=['a', 'b'])
    # get the first, 4th, and last date index for each month
-   df.groupby((df.index.year, df.index.month)).nth([0, 3, -1])
+   df.groupby([df.index.year, df.index.month]).nth([0, 3, -1])
 
 Enumerate group items
 ~~~~~~~~~~~~~~~~~~~~~

From ade2b2b488dffad717bfbcff91031c7a7feab548 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 15 Dec 2017 14:10:06 -0600
Subject: [PATCH 03/10] Cleanup, test unhashable

---
 doc/source/whatsnew/v0.22.0.txt      |  3 ++-
 pandas/core/groupby.py               | 27 +++++++++++++++++++--------
 pandas/tests/groupby/test_groupby.py |  9 +++++++++
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
index a004e24899cbf..8c222352a4c58 100644
--- a/doc/source/whatsnew/v0.22.0.txt
+++ b/doc/source/whatsnew/v0.22.0.txt
@@ -205,7 +205,8 @@ Deprecations
 - ``Series.asobject``, ``DatetimeIndex.asobject``, ``PeriodIndex.asobject`` and ``TimeDeltaIndex.asobject`` have been deprecated. Use ``.astype(object)`` instead (:issue:`18572`)
 - Grouping by a tuple of keys now emits a ``FutureWarning`` and is deprecated.
   In the future, a tuple passed to ``'by'`` will always refer to a single key
-  that is the actual tuple, instead of treating the tuple as multiple keys (:issue:`18314`)
+  that is the actual tuple, instead of treating the tuple as multiple keys. To
+  retain the previous behavior, use a list instead of a tuple (:issue:`18314`)
 
 .. _whatsnew_0220.prior_deprecations:
 
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 664db6b66a3ed..41b8e46d8940f 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -28,6 +28,7 @@
     is_bool_dtype,
     is_scalar,
     is_list_like,
+    is_hashable,
     needs_i8_conversion,
     _ensure_float64,
     _ensure_platform_int,
@@ -2850,14 +2851,24 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
     elif isinstance(key, BaseGrouper):
         return key, [], obj
 
-    tuple_as_list = isinstance(key, tuple) and key not in obj
-    if tuple_as_list:
-        msg = ("Interpreting tuple 'by' as a list of keys, rather than "
-               "a single key. Use 'by={!r}' instead of 'by={!r}'. In the "
-               "future, a tuple will always mean a single key.".format(
-                   list(key), key))
-        warnings.warn(msg, FutureWarning, stacklevel=5)
-        key = list(key)
+    # In the future, a tuple key will always mean an actual key,
+    # not an iterable of keys. In the meantime, we attempt to provide
+    # a warning. We can assume that the user wanted a list of keys when
+    # the key is not in the index. We just have to be careful with
+    # unhashble elements of `key`. Any unhashable elements implies that
+    # they wanted a list of keys.
+    # https://github.com/pandas-dev/pandas/issues/18314
+    is_tuple = isinstance(key, tuple)
+    all_hashable = is_tuple and all(is_hashable(x) for x in key)
+
+    if is_tuple:
+        if not all_hashable or key not in obj:
+            msg = ("Interpreting tuple 'by' as a list of keys, rather than "
+                   "a single key. Use 'by={!r}' instead of 'by={!r}'. In the "
+                   "future, a tuple will always mean a single key.".format(
+                    list(key), key))
+            warnings.warn(msg, FutureWarning, stacklevel=5)
+            key = list(key)
 
     if not isinstance(key, list):
         keys = [key]
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index e74f73768e376..8152c269e0ba0 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2743,7 +2743,16 @@ def test_tuple_warns(self):
 
         with tm.assert_produces_warning(None):
             df.groupby(('a', 'b')).c.mean()
+    
+    def test_tuple_warns_unhashable(self):
+        # https://github.com/pandas-dev/pandas/issues/18314
+        business_dates = date_range(start='4/1/2014', end='6/30/2014', freq='B')
+        df = DataFrame(1, index=business_dates, columns=['a', 'b'])
+
+        with tm.assert_produces_warning(FutureWarning) as w:
+            df.groupby((df.index.year, df.index.month)).nth([0, 3, -1])
 
+        assert "Interpreting tuple 'by' as a list" in str(w[0].message)
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = lmap(tuple, df[keys].values)

From 60502263bf344463cba6d1cd02fa8e2d17534963 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 15 Dec 2017 14:16:03 -0600
Subject: [PATCH 04/10] PEP8

---
 pandas/core/groupby.py               | 2 +-
 pandas/tests/groupby/test_groupby.py | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 41b8e46d8940f..92b92b0ee3947 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2866,7 +2866,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
             msg = ("Interpreting tuple 'by' as a list of keys, rather than "
                    "a single key. Use 'by={!r}' instead of 'by={!r}'. In the "
                    "future, a tuple will always mean a single key.".format(
-                    list(key), key))
+                       list(key), key))
             warnings.warn(msg, FutureWarning, stacklevel=5)
             key = list(key)
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 8152c269e0ba0..53612a0534ae8 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2743,10 +2743,11 @@ def test_tuple_warns(self):
 
         with tm.assert_produces_warning(None):
             df.groupby(('a', 'b')).c.mean()
-    
+
     def test_tuple_warns_unhashable(self):
         # https://github.com/pandas-dev/pandas/issues/18314
-        business_dates = date_range(start='4/1/2014', end='6/30/2014', freq='B')
+        business_dates = date_range(start='4/1/2014', end='6/30/2014',
+                                    freq='B')
         df = DataFrame(1, index=business_dates, columns=['a', 'b'])
 
         with tm.assert_produces_warning(FutureWarning) as w:
@@ -2754,6 +2755,7 @@ def test_tuple_warns_unhashable(self):
 
         assert "Interpreting tuple 'by' as a list" in str(w[0].message)
 
+
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = lmap(tuple, df[keys].values)
     tups = com._asarray_tuplesafe(tups)

From d8c20e85ea9363bbdbe58c16b51fe9b76eb4e24d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 15 Dec 2017 15:01:17 -0600
Subject: [PATCH 05/10] Correct KeyError

---
 pandas/core/groupby.py               | 12 +++++++++---
 pandas/tests/groupby/test_groupby.py |  8 ++++++++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 92b92b0ee3947..abce57d69d1c0 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2861,13 +2861,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
     is_tuple = isinstance(key, tuple)
     all_hashable = is_tuple and all(is_hashable(x) for x in key)
 
+    original_grouper = None
     if is_tuple:
         if not all_hashable or key not in obj:
             msg = ("Interpreting tuple 'by' as a list of keys, rather than "
-                   "a single key. Use 'by={!r}' instead of 'by={!r}'. In the "
-                   "future, a tuple will always mean a single key.".format(
-                       list(key), key))
+                   "a single key. Use 'by=[...]' instead of 'by=(...)'. In "
+                   "the future, a tuple will always mean a single key.")
             warnings.warn(msg, FutureWarning, stacklevel=5)
+            original_grouper = key
             key = list(key)
 
     if not isinstance(key, list):
@@ -2939,6 +2940,11 @@ def is_in_obj(gpr):
             elif obj._is_level_reference(gpr):
                 in_axis, name, level, gpr = False, None, gpr, None
             else:
+                # Want to raise with the correct KeyError here
+                # The deprecation in #18731 means we may have
+                # the wrong error message here.
+                if original_grouper:
+                    gpr = original_grouper
                 raise KeyError(gpr)
         elif isinstance(gpr, Grouper) and gpr.key is not None:
             # Add key to exclusions
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 53612a0534ae8..b014fc31d8582 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2755,6 +2755,14 @@ def test_tuple_warns_unhashable(self):
 
         assert "Interpreting tuple 'by' as a list" in str(w[0].message)
 
+    def test_tuple_correct_keyerror(self):
+        df = pd.DataFrame(1, index=range(3),
+                          columns=pd.MultiIndex.from_product([[1, 2],
+                                                              [3, 4]]))
+        with tm.assert_produces_warning(FutureWarning):  # just silence
+            with tm.assert_raises_regex(KeyError, "(7, 8)"):
+                df.groupby((7, 8)).mean()
+
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = lmap(tuple, df[keys].values)

From d2a2372bf85a22a26ce422becfd108650fe67c6d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 15 Dec 2017 15:08:58 -0600
Subject: [PATCH 06/10] update

---
 pandas/core/groupby.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index abce57d69d1c0..dcfe0764654d7 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2861,14 +2861,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
     is_tuple = isinstance(key, tuple)
     all_hashable = is_tuple and all(is_hashable(x) for x in key)
 
-    original_grouper = None
     if is_tuple:
-        if not all_hashable or key not in obj:
+        if (all_hashable and key not in obj and set(key).issubset(obj)) or not all_hashable:
+            # column names ('a', 'b') -> ['a', 'b']
+            # arrays like (a, b) -> [a, b]
             msg = ("Interpreting tuple 'by' as a list of keys, rather than "
                    "a single key. Use 'by=[...]' instead of 'by=(...)'. In "
                    "the future, a tuple will always mean a single key.")
             warnings.warn(msg, FutureWarning, stacklevel=5)
-            original_grouper = key
             key = list(key)
 
     if not isinstance(key, list):
@@ -2943,8 +2943,6 @@ def is_in_obj(gpr):
                 # Want to raise with the correct KeyError here
                 # The deprecation in #18731 means we may have
                 # the wrong error message here.
-                if original_grouper:
-                    gpr = original_grouper
                 raise KeyError(gpr)
         elif isinstance(gpr, Grouper) and gpr.key is not None:
             # Add key to exclusions

From 38ef818bc446049fb748cdef64dde222bfd70078 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 15 Dec 2017 15:17:07 -0600
Subject: [PATCH 07/10] xfail

---
 pandas/tests/groupby/test_groupby.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index b014fc31d8582..9ce61e2ffb10d 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2755,13 +2755,14 @@ def test_tuple_warns_unhashable(self):
 
         assert "Interpreting tuple 'by' as a list" in str(w[0].message)
 
+    @pytest.mark.xfail(reason="GH-18798")
     def test_tuple_correct_keyerror(self):
+        # https://github.com/pandas-dev/pandas/issues/18798
         df = pd.DataFrame(1, index=range(3),
                           columns=pd.MultiIndex.from_product([[1, 2],
                                                               [3, 4]]))
-        with tm.assert_produces_warning(FutureWarning):  # just silence
-            with tm.assert_raises_regex(KeyError, "(7, 8)"):
-                df.groupby((7, 8)).mean()
+        with tm.assert_raises_regex(KeyError, "(7, 8)"):
+            df.groupby((7, 8)).mean()
 
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):

From a27f4498b987c4b4e02f870ce76d498c37f2bb83 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 15 Dec 2017 15:17:49 -0600
Subject: [PATCH 08/10] remove old comments

---
 pandas/core/groupby.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index dcfe0764654d7..5e9bb01c77e3f 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2940,9 +2940,6 @@ def is_in_obj(gpr):
             elif obj._is_level_reference(gpr):
                 in_axis, name, level, gpr = False, None, gpr, None
             else:
-                # Want to raise with the correct KeyError here
-                # The deprecation in #18731 means we may have
-                # the wrong error message here.
                 raise KeyError(gpr)
         elif isinstance(gpr, Grouper) and gpr.key is not None:
             # Add key to exclusions

From 4e5ae9fff513828d69275a7b49826346a375273a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 15 Dec 2017 15:18:46 -0600
Subject: [PATCH 09/10] pep8

---
 pandas/core/groupby.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 5e9bb01c77e3f..974f039eb01a7 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2862,7 +2862,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
     all_hashable = is_tuple and all(is_hashable(x) for x in key)
 
     if is_tuple:
-        if (all_hashable and key not in obj and set(key).issubset(obj)) or not all_hashable:
+        if ((all_hashable and key not in obj and set(key).issubset(obj))
+                or not all_hashable):
             # column names ('a', 'b') -> ['a', 'b']
             # arrays like (a, b) -> [a, b]
             msg = ("Interpreting tuple 'by' as a list of keys, rather than "

From a8b438323a4e0dc320bea1e1dfdbd72934de70a6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 15 Dec 2017 17:09:00 -0600
Subject: [PATCH 10/10] Fixups

---
 pandas/core/groupby.py               | 2 +-
 pandas/tests/groupby/test_groupby.py | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 974f039eb01a7..b4223ac0a177a 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2859,7 +2859,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
     # they wanted a list of keys.
     # https://github.com/pandas-dev/pandas/issues/18314
     is_tuple = isinstance(key, tuple)
-    all_hashable = is_tuple and all(is_hashable(x) for x in key)
+    all_hashable = is_tuple and is_hashable(key)
 
     if is_tuple:
         if ((all_hashable and key not in obj and set(key).issubset(obj))
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 9ce61e2ffb10d..3327612b016f4 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -2736,11 +2736,6 @@ def test_tuple_warns(self):
 
         assert "Interpreting tuple 'by' as a list" in str(w[0].message)
 
-        with tm.assert_produces_warning(FutureWarning) as w:
-            df[['a', 'b', 'c']].groupby(('a', 'b')).c.mean()
-
-        assert "Interpreting tuple 'by' as a list" in str(w[0].message)
-
         with tm.assert_produces_warning(None):
             df.groupby(('a', 'b')).c.mean()