Skip to content

DEPR: group by one element list gets scalar keys #59179

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ Other Deprecations
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`)
- Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`)
- Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`)
- Deprecated behavior of :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups`, in a future version ``groups`` by one element list will return tuple instead of scalar. (:issue:`58858`)
- Deprecated behavior of :meth:`Series.dt.to_pytimedelta`, in a future version this will return a :class:`Series` containing python ``datetime.timedelta`` objects instead of an ``ndarray`` of timedelta; this matches the behavior of other :meth:`Series.dt` properties. (:issue:`57463`)
- Deprecated lowercase strings ``d``, ``b`` and ``c`` denoting frequencies in :class:`Day`, :class:`BusinessDay` and :class:`CustomBusinessDay` in favour of ``D``, ``B`` and ``C`` (:issue:`58998`)
- Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`)
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ def groups(self) -> dict[Hashable, Index]:
0 1 2 3
1 1 5 6
2 7 8 9
>>> df.groupby(by=["a"]).groups
>>> df.groupby(by="a").groups
{1: [0, 1], 7: [2]}

For Resampler:
Expand All @@ -654,6 +654,15 @@ def groups(self) -> dict[Hashable, Index]:
>>> ser.resample("MS").groups
{Timestamp('2023-01-01 00:00:00'): 2, Timestamp('2023-02-01 00:00:00'): 4}
"""
if isinstance(self.keys, list) and len(self.keys) == 1:
warnings.warn(
"`groups` by one element list returns scalar is deprecated "
"and will be removed. In a future version `groups` by one element "
"list will return tuple. Use ``df.groupby(by='a').groups`` "
"instead of ``df.groupby(by=['a']).groups`` to avoid this warning",
FutureWarning,
stacklevel=find_stack_level(),
)
return self._grouper.groups

@final
Expand Down
53 changes: 36 additions & 17 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,31 +545,38 @@ def test_multiindex_columns_empty_level(self):

df = DataFrame([[1, "A"]], columns=midx)

msg = "`groups` by one element list returns scalar is deprecated"
grouped = df.groupby("to filter").groups
assert grouped["A"] == [0]

grouped = df.groupby([("to filter", "")]).groups
with tm.assert_produces_warning(FutureWarning, match=msg):
grouped = df.groupby([("to filter", "")]).groups
assert grouped["A"] == [0]

df = DataFrame([[1, "A"], [2, "B"]], columns=midx)

expected = df.groupby("to filter").groups
result = df.groupby([("to filter", "")]).groups
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby([("to filter", "")]).groups
assert result == expected

df = DataFrame([[1, "A"], [2, "A"]], columns=midx)

expected = df.groupby("to filter").groups
result = df.groupby([("to filter", "")]).groups
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby([("to filter", "")]).groups
tm.assert_dict_equal(result, expected)

def test_groupby_multiindex_tuple(self):
# GH 17979
# GH 17979, GH#59179
df = DataFrame(
[[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]),
)
expected = df.groupby([("b", 1)]).groups

msg = "`groups` by one element list returns scalar is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df.groupby([("b", 1)]).groups
result = df.groupby(("b", 1)).groups
tm.assert_dict_equal(expected, result)

Expand All @@ -579,17 +586,21 @@ def test_groupby_multiindex_tuple(self):
[["a", "b", "b", "c"], ["d", "d", "e", "e"]]
),
)
expected = df2.groupby([("b", "d")]).groups

with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df2.groupby([("b", "d")]).groups
result = df.groupby(("b", 1)).groups
tm.assert_dict_equal(expected, result)

df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"])
expected = df3.groupby([("b", "d")]).groups

with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df3.groupby([("b", "d")]).groups
result = df.groupby(("b", 1)).groups
tm.assert_dict_equal(expected, result)

def test_groupby_multiindex_partial_indexing_equivalence(self):
# GH 17977
# GH 17977, GH#59179
df = DataFrame(
[[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]),
Expand All @@ -615,8 +626,10 @@ def test_groupby_multiindex_partial_indexing_equivalence(self):
result_max = df.groupby([("a", 1)])["b"].max()
tm.assert_frame_equal(expected_max, result_max)

expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups
result_groups = df.groupby([("a", 1)])["b"].groups
msg = "`groups` by one element list returns scalar is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups
result_groups = df.groupby([("a", 1)])["b"].groups
tm.assert_dict_equal(expected_groups, result_groups)

def test_groupby_level(self, sort, multiindex_dataframe_random_data, df):
Expand Down Expand Up @@ -719,15 +732,18 @@ def test_grouping_labels(self, multiindex_dataframe_random_data):
tm.assert_almost_equal(grouped._grouper.codes[0], exp_labels)

def test_list_grouper_with_nat(self):
# GH 14715
# GH 14715, GH#59179
df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")})
df.iloc[-1] = pd.NaT
grouper = Grouper(key="date", freq="YS")
msg = "`groups` by one element list returns scalar is deprecated"

# Grouper in a list grouping
result = df.groupby([grouper])
gb = df.groupby([grouper])
expected = {Timestamp("2011-01-01"): Index(list(range(364)))}
tm.assert_dict_equal(result.groups, expected)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = gb.groups
tm.assert_dict_equal(result, expected)

# Test case without a list
result = df.groupby(grouper)
Expand Down Expand Up @@ -994,17 +1010,20 @@ def test_gb_key_len_equal_axis_len(self):
class TestIteration:
def test_groups(self, df):
grouped = df.groupby(["A"])
groups = grouped.groups
assert groups is grouped.groups # caching works
msg = "`groups` by one element list returns scalar is deprecated"

with tm.assert_produces_warning(FutureWarning, match=msg):
groups = grouped.groups
assert groups is grouped.groups # caching works

for k, v in grouped.groups.items():
for k, v in groups.items():
assert (df.loc[v]["A"] == k).all()

grouped = df.groupby(["A", "B"])
groups = grouped.groups
assert groups is grouped.groups # caching works

for k, v in grouped.groups.items():
for k, v in groups.items():
assert (df.loc[v]["A"] == k[0]).all()
assert (df.loc[v]["B"] == k[1]).all()

Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/groupby/test_raises.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,9 @@ def test_groupby_raises_category_np(
_call_and_check(klass, msg, how, gb, groupby_func_np, ())


@pytest.mark.filterwarnings(
"ignore:`groups` by one element list returns scalar is deprecated"
)
@pytest.mark.parametrize("how", ["method", "agg", "transform"])
def test_groupby_raises_category_on_category(
how,
Expand Down