pandas-dev · rhshadrach · Jul 23, 2024 · Jul 4, 2024 · Jul 8, 2024 · Jul 10, 2024
@@ -634,7 +634,7 @@ def groups(self) -> dict[Hashable, Index]:
         0  1  2  3
         1  1  5  6
         2  7  8  9
-        >>> df.groupby(by=["a"]).groups
+        >>> df.groupby(by="a").groups
         {1: [0, 1], 7: [2]}
 
         For Resampler:

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -9,12 +9,14 @@
     TYPE_CHECKING,
     final,
 )
+import warnings
 
 import numpy as np
 
 from pandas._libs.tslibs import OutOfBoundsDatetime
 from pandas.errors import InvalidIndexError
 from pandas.util._decorators import cache_readonly
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import (
     is_list_like,
@@ -441,6 +443,7 @@ def __init__(
         in_axis: bool = False,
         dropna: bool = True,
         uniques: ArrayLike | None = None,
+        key_dtype_str: bool = False,
     ) -> None:
         self.level = level
         self._orig_grouper = grouper
@@ -453,6 +456,7 @@ def __init__(
         self.in_axis = in_axis
         self._dropna = dropna
         self._uniques = uniques
+        self.key_dtype_str = key_dtype_str
 
         # we have a single grouper which may be a myriad of things,
         # some of which are dependent on the passing in level
@@ -667,6 +671,15 @@ def groups(self) -> dict[Hashable, Index]:
         codes, uniques = self._codes_and_uniques
         uniques = Index._with_infer(uniques, name=self.name)
         cats = Categorical.from_codes(codes, uniques, validate=False)
+        if not self.key_dtype_str:
+            warnings.warn(
+                "`groups` by one element list returns scalar is deprecated "
+                "and will be removed. In a future version `groups` by one element "
+                "list will return tuple. Use ``df.groupby(by='a').groups`` "
+                "instead of ``df.groupby(by=['a']).groups`` to avoid this warning",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
         return self._index.groupby(cats)
 
     @property
@@ -781,7 +794,9 @@ def get_grouper(
     elif isinstance(key, ops.BaseGrouper):
         return key, frozenset(), obj
 
+    key_dtype_str = False
     if not isinstance(key, list):
+        key_dtype_str = True
         keys = [key]
         match_axis_length = False
     else:
@@ -892,6 +907,7 @@ def is_in_obj(gpr) -> bool:
                 observed=observed,
                 in_axis=in_axis,
                 dropna=dropna,
+                key_dtype_str=key_dtype_str,
             )
             if not isinstance(gpr, Grouping)
             else gpr

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -2996,3 +2996,15 @@ def test_groupby_multi_index_codes():
 
     index = df_grouped.index
     tm.assert_index_equal(index, MultiIndex.from_frame(index.to_frame()))
+
+
+def test_groupby_keys_1length_list():
+    # GH#59179
+    msg = "`groups` by one element list returns scalar is deprecated"
+
+    df = DataFrame({"x": [10, 20, 30], "y": ["a", "b", "c"]})
+    expected = {10: [0], 20: [1], 30: [2]}
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.groupby(["x"]).groups
+    tm.assert_dict_equal(result, expected)
+    print(result, type(result))
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
@@ -545,31 +545,38 @@ def test_multiindex_columns_empty_level(self):
 
         df = DataFrame([[1, "A"]], columns=midx)
 
+        msg = "`groups` by one element list returns scalar is deprecated"
         grouped = df.groupby("to filter").groups
         assert grouped["A"] == [0]
 
-        grouped = df.groupby([("to filter", "")]).groups
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            grouped = df.groupby([("to filter", "")]).groups
         assert grouped["A"] == [0]
 
         df = DataFrame([[1, "A"], [2, "B"]], columns=midx)
 
         expected = df.groupby("to filter").groups
-        result = df.groupby([("to filter", "")]).groups
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.groupby([("to filter", "")]).groups
         assert result == expected
 
         df = DataFrame([[1, "A"], [2, "A"]], columns=midx)
 
         expected = df.groupby("to filter").groups
-        result = df.groupby([("to filter", "")]).groups
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.groupby([("to filter", "")]).groups
         tm.assert_dict_equal(result, expected)
 
     def test_groupby_multiindex_tuple(self):
-        # GH 17979
+        # GH 17979, GH#59179
         df = DataFrame(
             [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
             columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]),
         )
-        expected = df.groupby([("b", 1)]).groups
+
+        msg = "`groups` by one element list returns scalar is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = df.groupby([("b", 1)]).groups
         result = df.groupby(("b", 1)).groups
         tm.assert_dict_equal(expected, result)
 
@@ -579,17 +586,21 @@ def test_groupby_multiindex_tuple(self):
                 [["a", "b", "b", "c"], ["d", "d", "e", "e"]]
             ),
         )
-        expected = df2.groupby([("b", "d")]).groups
+
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = df2.groupby([("b", "d")]).groups
         result = df.groupby(("b", 1)).groups
         tm.assert_dict_equal(expected, result)
 
         df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"])
-        expected = df3.groupby([("b", "d")]).groups
+
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected = df3.groupby([("b", "d")]).groups
         result = df.groupby(("b", 1)).groups
         tm.assert_dict_equal(expected, result)
 
     def test_groupby_multiindex_partial_indexing_equivalence(self):
-        # GH 17977
+        # GH 17977, GH#59179
         df = DataFrame(
             [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
             columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]),
@@ -615,8 +626,10 @@ def test_groupby_multiindex_partial_indexing_equivalence(self):
         result_max = df.groupby([("a", 1)])["b"].max()
         tm.assert_frame_equal(expected_max, result_max)
 
-        expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups
-        result_groups = df.groupby([("a", 1)])["b"].groups
+        msg = "`groups` by one element list returns scalar is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups
+            result_groups = df.groupby([("a", 1)])["b"].groups
         tm.assert_dict_equal(expected_groups, result_groups)
 
     def test_groupby_level(self, sort, multiindex_dataframe_random_data, df):
@@ -719,15 +732,18 @@ def test_grouping_labels(self, multiindex_dataframe_random_data):
         tm.assert_almost_equal(grouped._grouper.codes[0], exp_labels)
 
     def test_list_grouper_with_nat(self):
-        # GH 14715
+        # GH 14715, GH#59179
         df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")})
         df.iloc[-1] = pd.NaT
         grouper = Grouper(key="date", freq="YS")
+        msg = "`groups` by one element list returns scalar is deprecated"
 
         # Grouper in a list grouping
         result = df.groupby([grouper])
         expected = {Timestamp("2011-01-01"): Index(list(range(364)))}
-        tm.assert_dict_equal(result.groups, expected)
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = result.groups
+        tm.assert_dict_equal(result, expected)
 
         # Test case without a list
         result = df.groupby(grouper)
@@ -994,7 +1010,10 @@ def test_gb_key_len_equal_axis_len(self):
 class TestIteration:
     def test_groups(self, df):
         grouped = df.groupby(["A"])
-        groups = grouped.groups
+        msg = "`groups` by one element list returns scalar is deprecated"
+
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            groups = grouped.groups
         assert groups is grouped.groups  # caching works
 
         for k, v in grouped.groups.items():

diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
@@ -534,6 +534,9 @@ def test_groupby_raises_category_np(
     _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
 
+@pytest.mark.filterwarnings(
+    "ignore:`groups` by one element list returns scalar is deprecated"
+)
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
 def test_groupby_raises_category_on_category(
     how,