Skip to content

Commit e191a06

Browse files
authored
DEPR: group by one element list gets scalar keys (#59179)
1 parent 31a1df1 commit e191a06

File tree

4 files changed

+50
-18
lines changed

4 files changed

+50
-18
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ Other Deprecations
283283
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`)
284284
- Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`)
285285
- Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`)
286+
- Deprecated behavior of :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups`, in a future version ``groups`` by one element list will return tuple instead of scalar. (:issue:`58858`)
286287
- Deprecated behavior of :meth:`Series.dt.to_pytimedelta`, in a future version this will return a :class:`Series` containing python ``datetime.timedelta`` objects instead of an ``ndarray`` of timedelta; this matches the behavior of other :meth:`Series.dt` properties. (:issue:`57463`)
287288
- Deprecated lowercase strings ``d``, ``b`` and ``c`` denoting frequencies in :class:`Day`, :class:`BusinessDay` and :class:`CustomBusinessDay` in favour of ``D``, ``B`` and ``C`` (:issue:`58998`)
288289
- Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`)

pandas/core/groupby/groupby.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,7 @@ def groups(self) -> dict[Hashable, Index]:
634634
0 1 2 3
635635
1 1 5 6
636636
2 7 8 9
637-
>>> df.groupby(by=["a"]).groups
637+
>>> df.groupby(by="a").groups
638638
{1: [0, 1], 7: [2]}
639639
640640
For Resampler:
@@ -654,6 +654,15 @@ def groups(self) -> dict[Hashable, Index]:
654654
>>> ser.resample("MS").groups
655655
{Timestamp('2023-01-01 00:00:00'): 2, Timestamp('2023-02-01 00:00:00'): 4}
656656
"""
657+
if isinstance(self.keys, list) and len(self.keys) == 1:
658+
warnings.warn(
659+
"`groups` by one element list returns scalar is deprecated "
660+
"and will be removed. In a future version `groups` by one element "
661+
"list will return tuple. Use ``df.groupby(by='a').groups`` "
662+
"instead of ``df.groupby(by=['a']).groups`` to avoid this warning",
663+
FutureWarning,
664+
stacklevel=find_stack_level(),
665+
)
657666
return self._grouper.groups
658667

659668
@final

pandas/tests/groupby/test_grouping.py

+36-17
Original file line numberDiff line numberDiff line change
@@ -545,31 +545,38 @@ def test_multiindex_columns_empty_level(self):
545545

546546
df = DataFrame([[1, "A"]], columns=midx)
547547

548+
msg = "`groups` by one element list returns scalar is deprecated"
548549
grouped = df.groupby("to filter").groups
549550
assert grouped["A"] == [0]
550551

551-
grouped = df.groupby([("to filter", "")]).groups
552+
with tm.assert_produces_warning(FutureWarning, match=msg):
553+
grouped = df.groupby([("to filter", "")]).groups
552554
assert grouped["A"] == [0]
553555

554556
df = DataFrame([[1, "A"], [2, "B"]], columns=midx)
555557

556558
expected = df.groupby("to filter").groups
557-
result = df.groupby([("to filter", "")]).groups
559+
with tm.assert_produces_warning(FutureWarning, match=msg):
560+
result = df.groupby([("to filter", "")]).groups
558561
assert result == expected
559562

560563
df = DataFrame([[1, "A"], [2, "A"]], columns=midx)
561564

562565
expected = df.groupby("to filter").groups
563-
result = df.groupby([("to filter", "")]).groups
566+
with tm.assert_produces_warning(FutureWarning, match=msg):
567+
result = df.groupby([("to filter", "")]).groups
564568
tm.assert_dict_equal(result, expected)
565569

566570
def test_groupby_multiindex_tuple(self):
567-
# GH 17979
571+
# GH 17979, GH#59179
568572
df = DataFrame(
569573
[[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
570574
columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]),
571575
)
572-
expected = df.groupby([("b", 1)]).groups
576+
577+
msg = "`groups` by one element list returns scalar is deprecated"
578+
with tm.assert_produces_warning(FutureWarning, match=msg):
579+
expected = df.groupby([("b", 1)]).groups
573580
result = df.groupby(("b", 1)).groups
574581
tm.assert_dict_equal(expected, result)
575582

@@ -579,17 +586,21 @@ def test_groupby_multiindex_tuple(self):
579586
[["a", "b", "b", "c"], ["d", "d", "e", "e"]]
580587
),
581588
)
582-
expected = df2.groupby([("b", "d")]).groups
589+
590+
with tm.assert_produces_warning(FutureWarning, match=msg):
591+
expected = df2.groupby([("b", "d")]).groups
583592
result = df.groupby(("b", 1)).groups
584593
tm.assert_dict_equal(expected, result)
585594

586595
df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"])
587-
expected = df3.groupby([("b", "d")]).groups
596+
597+
with tm.assert_produces_warning(FutureWarning, match=msg):
598+
expected = df3.groupby([("b", "d")]).groups
588599
result = df.groupby(("b", 1)).groups
589600
tm.assert_dict_equal(expected, result)
590601

591602
def test_groupby_multiindex_partial_indexing_equivalence(self):
592-
# GH 17977
603+
# GH 17977, GH#59179
593604
df = DataFrame(
594605
[[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
595606
columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]),
@@ -615,8 +626,10 @@ def test_groupby_multiindex_partial_indexing_equivalence(self):
615626
result_max = df.groupby([("a", 1)])["b"].max()
616627
tm.assert_frame_equal(expected_max, result_max)
617628

618-
expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups
619-
result_groups = df.groupby([("a", 1)])["b"].groups
629+
msg = "`groups` by one element list returns scalar is deprecated"
630+
with tm.assert_produces_warning(FutureWarning, match=msg):
631+
expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups
632+
result_groups = df.groupby([("a", 1)])["b"].groups
620633
tm.assert_dict_equal(expected_groups, result_groups)
621634

622635
def test_groupby_level(self, sort, multiindex_dataframe_random_data, df):
@@ -719,15 +732,18 @@ def test_grouping_labels(self, multiindex_dataframe_random_data):
719732
tm.assert_almost_equal(grouped._grouper.codes[0], exp_labels)
720733

721734
def test_list_grouper_with_nat(self):
722-
# GH 14715
735+
# GH 14715, GH#59179
723736
df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")})
724737
df.iloc[-1] = pd.NaT
725738
grouper = Grouper(key="date", freq="YS")
739+
msg = "`groups` by one element list returns scalar is deprecated"
726740

727741
# Grouper in a list grouping
728-
result = df.groupby([grouper])
742+
gb = df.groupby([grouper])
729743
expected = {Timestamp("2011-01-01"): Index(list(range(364)))}
730-
tm.assert_dict_equal(result.groups, expected)
744+
with tm.assert_produces_warning(FutureWarning, match=msg):
745+
result = gb.groups
746+
tm.assert_dict_equal(result, expected)
731747

732748
# Test case without a list
733749
result = df.groupby(grouper)
@@ -994,17 +1010,20 @@ def test_gb_key_len_equal_axis_len(self):
9941010
class TestIteration:
9951011
def test_groups(self, df):
9961012
grouped = df.groupby(["A"])
997-
groups = grouped.groups
998-
assert groups is grouped.groups # caching works
1013+
msg = "`groups` by one element list returns scalar is deprecated"
1014+
1015+
with tm.assert_produces_warning(FutureWarning, match=msg):
1016+
groups = grouped.groups
1017+
assert groups is grouped.groups # caching works
9991018

1000-
for k, v in grouped.groups.items():
1019+
for k, v in groups.items():
10011020
assert (df.loc[v]["A"] == k).all()
10021021

10031022
grouped = df.groupby(["A", "B"])
10041023
groups = grouped.groups
10051024
assert groups is grouped.groups # caching works
10061025

1007-
for k, v in grouped.groups.items():
1026+
for k, v in groups.items():
10081027
assert (df.loc[v]["A"] == k[0]).all()
10091028
assert (df.loc[v]["B"] == k[1]).all()
10101029

pandas/tests/groupby/test_raises.py

+3
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,9 @@ def test_groupby_raises_category_np(
534534
_call_and_check(klass, msg, how, gb, groupby_func_np, ())
535535

536536

537+
@pytest.mark.filterwarnings(
538+
"ignore:`groups` by one element list returns scalar is deprecated"
539+
)
537540
@pytest.mark.parametrize("how", ["method", "agg", "transform"])
538541
def test_groupby_raises_category_on_category(
539542
how,

0 commit comments

Comments
 (0)