Skip to content

DEPR: group by one element list gets scalar keys #59179

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@ def groups(self) -> dict[Hashable, Index]:
0 1 2 3
1 1 5 6
2 7 8 9
>>> df.groupby(by=["a"]).groups
>>> df.groupby(by="a").groups
{1: [0, 1], 7: [2]}

For Resampler:
Expand Down
16 changes: 16 additions & 0 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
TYPE_CHECKING,
final,
)
import warnings

import numpy as np

from pandas._libs.tslibs import OutOfBoundsDatetime
from pandas.errors import InvalidIndexError
from pandas.util._decorators import cache_readonly
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
is_list_like,
Expand Down Expand Up @@ -441,6 +443,7 @@ def __init__(
in_axis: bool = False,
dropna: bool = True,
uniques: ArrayLike | None = None,
key_dtype_str: bool = False,
) -> None:
self.level = level
self._orig_grouper = grouper
Expand All @@ -453,6 +456,7 @@ def __init__(
self.in_axis = in_axis
self._dropna = dropna
self._uniques = uniques
self.key_dtype_str = key_dtype_str

# we have a single grouper which may be a myriad of things,
# some of which are dependent on the passing in level
Expand Down Expand Up @@ -667,6 +671,15 @@ def groups(self) -> dict[Hashable, Index]:
codes, uniques = self._codes_and_uniques
uniques = Index._with_infer(uniques, name=self.name)
cats = Categorical.from_codes(codes, uniques, validate=False)
if not self.key_dtype_str:
warnings.warn(
"`groups` by one element list returns scalar is deprecated "
"and will be removed. In a future version `groups` by one element "
"list will return tuple. Use ``df.groupby(by='a').groups`` "
"instead of ``df.groupby(by=['a']).groups`` to avoid this warning",
FutureWarning,
stacklevel=find_stack_level(),
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you move this to GroupBy.groups? Then you won't need to modify Grouping (e.g. include extra arguments) at all.

return self._index.groupby(cats)

@property
Expand Down Expand Up @@ -781,7 +794,9 @@ def get_grouper(
elif isinstance(key, ops.BaseGrouper):
return key, frozenset(), obj

key_dtype_str = False
if not isinstance(key, list):
key_dtype_str = True
keys = [key]
match_axis_length = False
else:
Expand Down Expand Up @@ -892,6 +907,7 @@ def is_in_obj(gpr) -> bool:
observed=observed,
in_axis=in_axis,
dropna=dropna,
key_dtype_str=key_dtype_str,
)
if not isinstance(gpr, Grouping)
else gpr
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2996,3 +2996,15 @@ def test_groupby_multi_index_codes():

index = df_grouped.index
tm.assert_index_equal(index, MultiIndex.from_frame(index.to_frame()))


def test_groupby_keys_1length_list():
# GH#59179
msg = "`groups` by one element list returns scalar is deprecated"

df = DataFrame({"x": [10, 20, 30], "y": ["a", "b", "c"]})
expected = {10: [0], 20: [1], 30: [2]}
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby(["x"]).groups
tm.assert_dict_equal(result, expected)
print(result, type(result))
45 changes: 32 additions & 13 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,31 +545,38 @@ def test_multiindex_columns_empty_level(self):

df = DataFrame([[1, "A"]], columns=midx)

msg = "`groups` by one element list returns scalar is deprecated"
grouped = df.groupby("to filter").groups
assert grouped["A"] == [0]

grouped = df.groupby([("to filter", "")]).groups
with tm.assert_produces_warning(FutureWarning, match=msg):
grouped = df.groupby([("to filter", "")]).groups
assert grouped["A"] == [0]

df = DataFrame([[1, "A"], [2, "B"]], columns=midx)

expected = df.groupby("to filter").groups
result = df.groupby([("to filter", "")]).groups
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby([("to filter", "")]).groups
assert result == expected

df = DataFrame([[1, "A"], [2, "A"]], columns=midx)

expected = df.groupby("to filter").groups
result = df.groupby([("to filter", "")]).groups
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby([("to filter", "")]).groups
tm.assert_dict_equal(result, expected)

def test_groupby_multiindex_tuple(self):
# GH 17979
# GH 17979, GH#59179
df = DataFrame(
[[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]),
)
expected = df.groupby([("b", 1)]).groups

msg = "`groups` by one element list returns scalar is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df.groupby([("b", 1)]).groups
result = df.groupby(("b", 1)).groups
tm.assert_dict_equal(expected, result)

Expand All @@ -579,17 +586,21 @@ def test_groupby_multiindex_tuple(self):
[["a", "b", "b", "c"], ["d", "d", "e", "e"]]
),
)
expected = df2.groupby([("b", "d")]).groups

with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df2.groupby([("b", "d")]).groups
result = df.groupby(("b", 1)).groups
tm.assert_dict_equal(expected, result)

df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"])
expected = df3.groupby([("b", "d")]).groups

with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df3.groupby([("b", "d")]).groups
result = df.groupby(("b", 1)).groups
tm.assert_dict_equal(expected, result)

def test_groupby_multiindex_partial_indexing_equivalence(self):
# GH 17977
# GH 17977, GH#59179
df = DataFrame(
[[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]),
Expand All @@ -615,8 +626,10 @@ def test_groupby_multiindex_partial_indexing_equivalence(self):
result_max = df.groupby([("a", 1)])["b"].max()
tm.assert_frame_equal(expected_max, result_max)

expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups
result_groups = df.groupby([("a", 1)])["b"].groups
msg = "`groups` by one element list returns scalar is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups
result_groups = df.groupby([("a", 1)])["b"].groups
tm.assert_dict_equal(expected_groups, result_groups)

def test_groupby_level(self, sort, multiindex_dataframe_random_data, df):
Expand Down Expand Up @@ -719,15 +732,18 @@ def test_grouping_labels(self, multiindex_dataframe_random_data):
tm.assert_almost_equal(grouped._grouper.codes[0], exp_labels)

def test_list_grouper_with_nat(self):
# GH 14715
# GH 14715, GH#59179
df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")})
df.iloc[-1] = pd.NaT
grouper = Grouper(key="date", freq="YS")
msg = "`groups` by one element list returns scalar is deprecated"

# Grouper in a list grouping
result = df.groupby([grouper])
expected = {Timestamp("2011-01-01"): Index(list(range(364)))}
tm.assert_dict_equal(result.groups, expected)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = result.groups
tm.assert_dict_equal(result, expected)

# Test case without a list
result = df.groupby(grouper)
Expand Down Expand Up @@ -994,7 +1010,10 @@ def test_gb_key_len_equal_axis_len(self):
class TestIteration:
def test_groups(self, df):
grouped = df.groupby(["A"])
groups = grouped.groups
msg = "`groups` by one element list returns scalar is deprecated"

with tm.assert_produces_warning(FutureWarning, match=msg):
groups = grouped.groups
assert groups is grouped.groups # caching works

for k, v in grouped.groups.items():
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/groupby/test_raises.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,9 @@ def test_groupby_raises_category_np(
_call_and_check(klass, msg, how, gb, groupby_func_np, ())


@pytest.mark.filterwarnings(
"ignore:`groups` by one element list returns scalar is deprecated"
)
@pytest.mark.parametrize("how", ["method", "agg", "transform"])
def test_groupby_raises_category_on_category(
how,
Expand Down
Loading