diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e5917c9176c54..ac093828d1ec1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -280,6 +280,7 @@ Other Deprecations - Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`) - Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`) - Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`) +- Deprecated behavior of :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups`, in a future version ``groups`` by one element list will return tuple instead of scalar. (:issue:`58858`) - Deprecated behavior of :meth:`Series.dt.to_pytimedelta`, in a future version this will return a :class:`Series` containing python ``datetime.timedelta`` objects instead of an ``ndarray`` of timedelta; this matches the behavior of other :meth:`Series.dt` properties. (:issue:`57463`) - Deprecated lowercase strings ``d``, ``b`` and ``c`` denoting frequencies in :class:`Day`, :class:`BusinessDay` and :class:`CustomBusinessDay` in favour of ``D``, ``B`` and ``C`` (:issue:`58998`) - Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index bf71bb80b3623..945173bc48fe9 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -634,7 +634,7 @@ def groups(self) -> dict[Hashable, Index]: 0 1 2 3 1 1 5 6 2 7 8 9 - >>> df.groupby(by=["a"]).groups + >>> df.groupby(by="a").groups {1: [0, 1], 7: [2]} For Resampler: @@ -654,6 +654,15 @@ def groups(self) -> dict[Hashable, Index]: >>> ser.resample("MS").groups {Timestamp('2023-01-01 00:00:00'): 2, Timestamp('2023-02-01 00:00:00'): 4} """ + if isinstance(self.keys, list) and len(self.keys) == 1: + warnings.warn( + "`groups` by one element list returns scalar is deprecated " + "and will be removed. In a future version `groups` by one element " + "list will return tuple. Use ``df.groupby(by='a').groups`` " + "instead of ``df.groupby(by=['a']).groups`` to avoid this warning", + FutureWarning, + stacklevel=find_stack_level(), + ) return self._grouper.groups @final diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 39eadd32f300d..814b35ad577f1 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -545,31 +545,38 @@ def test_multiindex_columns_empty_level(self): df = DataFrame([[1, "A"]], columns=midx) + msg = "`groups` by one element list returns scalar is deprecated" grouped = df.groupby("to filter").groups assert grouped["A"] == [0] - grouped = df.groupby([("to filter", "")]).groups + with tm.assert_produces_warning(FutureWarning, match=msg): + grouped = df.groupby([("to filter", "")]).groups assert grouped["A"] == [0] df = DataFrame([[1, "A"], [2, "B"]], columns=midx) expected = df.groupby("to filter").groups - result = df.groupby([("to filter", "")]).groups + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby([("to filter", "")]).groups assert result == expected df = DataFrame([[1, "A"], [2, "A"]], columns=midx) expected = df.groupby("to filter").groups - result = df.groupby([("to filter", "")]).groups + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby([("to filter", "")]).groups tm.assert_dict_equal(result, expected) def test_groupby_multiindex_tuple(self): - # GH 17979 + # GH 17979, GH#59179 df = DataFrame( [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]], columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]), ) - expected = df.groupby([("b", 1)]).groups + + msg = "`groups` by one element list returns scalar is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = df.groupby([("b", 1)]).groups result = df.groupby(("b", 1)).groups tm.assert_dict_equal(expected, result) @@ -579,17 +586,21 @@ def test_groupby_multiindex_tuple(self): [["a", "b", "b", "c"], ["d", "d", "e", "e"]] ), ) - expected = df2.groupby([("b", "d")]).groups + + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = df2.groupby([("b", "d")]).groups result = df.groupby(("b", 1)).groups tm.assert_dict_equal(expected, result) df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"]) - expected = df3.groupby([("b", "d")]).groups + + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = df3.groupby([("b", "d")]).groups result = df.groupby(("b", 1)).groups tm.assert_dict_equal(expected, result) def test_groupby_multiindex_partial_indexing_equivalence(self): - # GH 17977 + # GH 17977, GH#59179 df = DataFrame( [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]], columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]), @@ -615,8 +626,10 @@ def test_groupby_multiindex_partial_indexing_equivalence(self): result_max = df.groupby([("a", 1)])["b"].max() tm.assert_frame_equal(expected_max, result_max) - expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups - result_groups = df.groupby([("a", 1)])["b"].groups + msg = "`groups` by one element list returns scalar is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups + result_groups = df.groupby([("a", 1)])["b"].groups tm.assert_dict_equal(expected_groups, result_groups) def test_groupby_level(self, sort, multiindex_dataframe_random_data, df): @@ -719,15 +732,18 @@ def test_grouping_labels(self, multiindex_dataframe_random_data): tm.assert_almost_equal(grouped._grouper.codes[0], exp_labels) def test_list_grouper_with_nat(self): - # GH 14715 + # GH 14715, GH#59179 df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")}) df.iloc[-1] = pd.NaT grouper = Grouper(key="date", freq="YS") + msg = "`groups` by one element list returns scalar is deprecated" # Grouper in a list grouping - result = df.groupby([grouper]) + gb = df.groupby([grouper]) expected = {Timestamp("2011-01-01"): Index(list(range(364)))} - tm.assert_dict_equal(result.groups, expected) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.groups + tm.assert_dict_equal(result, expected) # Test case without a list result = df.groupby(grouper) @@ -994,17 +1010,20 @@ def test_gb_key_len_equal_axis_len(self): class TestIteration: def test_groups(self, df): grouped = df.groupby(["A"]) - groups = grouped.groups - assert groups is grouped.groups # caching works + msg = "`groups` by one element list returns scalar is deprecated" + + with tm.assert_produces_warning(FutureWarning, match=msg): + groups = grouped.groups + assert groups is grouped.groups # caching works - for k, v in grouped.groups.items(): + for k, v in groups.items(): assert (df.loc[v]["A"] == k).all() grouped = df.groupby(["A", "B"]) groups = grouped.groups assert groups is grouped.groups # caching works - for k, v in grouped.groups.items(): + for k, v in groups.items(): assert (df.loc[v]["A"] == k[0]).all() assert (df.loc[v]["B"] == k[1]).all() diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index 5a8192a9ffe02..9f3e620ca9872 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -534,6 +534,9 @@ def test_groupby_raises_category_np( _call_and_check(klass, msg, how, gb, groupby_func_np, ()) +@pytest.mark.filterwarnings( + "ignore:`groups` by one element list returns scalar is deprecated" +) @pytest.mark.parametrize("how", ["method", "agg", "transform"]) def test_groupby_raises_category_on_category( how,