From c1ed8899af221cdcf9d6a055dfbe195aae9f518a Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 16 Jul 2023 10:26:51 -0400 Subject: [PATCH 1/5] DEPR: Not passing tuple to get_group when grouping on length-1 list-likes --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/groupby/groupby.py | 19 ++++++++++++++++ pandas/tests/groupby/test_categorical.py | 5 ++++- pandas/tests/groupby/test_groupby.py | 28 ++++++++++++++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 44e091e12bfa6..3d6b6a605bea7 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -356,6 +356,7 @@ Deprecations - Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`) - Deprecated literal string input to :func:`read_xml`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead. (:issue:`53767`) - Deprecated literal string/bytes input to :func:`read_html`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead. (:issue:`53767`) +- Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`) - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`) - Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 85ec8c1b86374..297d146729bce 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1010,6 +1010,25 @@ def get_group(self, name, obj=None) -> DataFrame | Series: 2023-01-15 2 dtype: int64 """ + keys = self.keys + level = self.level + if (is_list_like(level) and len(level) == 1) or ( + is_list_like(keys) and len(keys) == 1 + ): + # GH#25971 + if isinstance(name, tuple) and len(name) == 1: + # Allow users to pass tuples of length 1 to silence warning + name = name[0] + elif not isinstance(name, tuple): + warnings.warn( + "When grouping with a length-1 list-like, " + "you will need to pass a length-1 tuple to get_group in a future " + "version of pandas. Pass `(name,)` instead of `name` to silence " + "this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + inds = self._get_index(name) if not len(inds): raise KeyError(name) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 3ab62bb7656b7..8398fda16158f 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -264,7 +264,10 @@ def test_level_get_group(observed): names=["Index1", "Index2"], ), ) - result = g.get_group("a") + msg = "you will need to pass a length-1 tuple" + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH#25971 - warn when not passing a length-1 tuple + result = g.get_group("a") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 635416f0cb1d6..1d7f42decb6bb 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3122,3 +3122,31 @@ def test_groupby_with_Time_Grouper(): df = test_data.groupby(Grouper(key="time2", freq="1T")).count().reset_index() tm.assert_frame_equal(df, expected_output) + + +@pytest.mark.parametrize("test_series", [True, False]) +@pytest.mark.parametrize( + "kwarg, value, name, warn", + [ + ("by", "a", 1, None), + ("by", ["a"], 1, FutureWarning), + ("by", ["a"], (1,), None), + ("level", 0, 1, None), + ("level", [0], 1, FutureWarning), + ("level", [0], (1,), None), + ], +) +def test_depr_get_group_len_1_list_likes(test_series, kwarg, value, name, warn): + # GH#25971 + obj = DataFrame({"b": [3, 4, 5]}, index=Index([1, 1, 2], name="a")) + if test_series: + obj = obj["b"] + gb = obj.groupby(**{kwarg: value}) + msg = "you will need to pass a length-1 tuple" + with tm.assert_produces_warning(warn, match=msg): + result = gb.get_group(name) + if test_series: + expected = Series([3, 4], index=Index([1, 1], name="a"), name="b") + else: + expected = DataFrame({"b": [3, 4]}, index=Index([1, 1], name="a")) + tm.assert_equal(result, expected) From 783d6f0947fde8706e41c456ee4b97b7108ad3a7 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 19 Jul 2023 16:45:06 -0400 Subject: [PATCH 2/5] test fixup --- pandas/tests/reshape/merge/test_join.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 179748f0506b5..c5adc52113af3 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -753,9 +753,7 @@ def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix left_grouped = left.groupby(join_col) right_grouped = right.groupby(join_col) - for group_key, group in result.groupby( - join_col if len(join_col) > 1 else join_col[0] - ): + for group_key, group in result.groupby(join_col): l_joined = _restrict_to_columns(group, left.columns, lsuffix) r_joined = _restrict_to_columns(group, right.columns, rsuffix) From 02da9ad00ce1a36a83efbf8cf2525cd6446e7886 Mon Sep 17 00:00:00 2001 From: richard Date: Fri, 18 Aug 2023 16:13:33 -0400 Subject: [PATCH 3/5] Move whatsnew note; fixup in cookbook --- doc/source/user_guide/cookbook.rst | 2 +- doc/source/whatsnew/v2.1.0.rst | 1 - doc/source/whatsnew/v2.2.0.rst | 2 ++ 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 66ee571d6b5a5..c0d2a14507383 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -466,7 +466,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to .. ipython:: python - gb = df.groupby(["animal"]) + gb = df.groupby("animal") gb.get_group("cat") `Apply to different items in a group diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index f38ec93299901..43a64a79e691b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -570,7 +570,6 @@ Other Deprecations - Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead (:issue:`53409`) - Deprecated literal string input to :func:`read_xml`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead (:issue:`53767`) - Deprecated literal string/bytes input to :func:`read_html`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead (:issue:`53767`) -- Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`) - Deprecated option ``mode.use_inf_as_na``, convert inf entries to ``NaN`` before instead (:issue:`51684`) - Deprecated parameter ``obj`` in :meth:`.DataFrameGroupBy.get_group` (:issue:`53545`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index b90563ba43d83..1abd538c214ad 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -98,6 +98,8 @@ Deprecations - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_markdown` except ``buf``. (:issue:`54229`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`) - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`) +- Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`) + - .. --------------------------------------------------------------------------- From dabf143622a4feab5b1b8531587ba8d68a810e6b Mon Sep 17 00:00:00 2001 From: richard Date: Fri, 18 Aug 2023 17:13:13 -0400 Subject: [PATCH 4/5] fixups --- doc/source/user_guide/groupby.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 75c816f66d5e4..c28123cec4491 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -211,9 +211,9 @@ For example, the groups created by ``groupby()`` below are in the order they app .. ipython:: python df3 = pd.DataFrame({"X": ["A", "B", "A", "B"], "Y": [1, 4, 3, 2]}) - df3.groupby(["X"]).get_group("A") + df3.groupby("X").get_group("A") - df3.groupby(["X"]).get_group("B") + df3.groupby(["X"]).get_group(("B",)) .. _groupby.dropna: From 5c506d737955da73f2f5f653f28a0f9453265533 Mon Sep 17 00:00:00 2001 From: richard Date: Sat, 19 Aug 2023 06:07:28 -0400 Subject: [PATCH 5/5] fixup --- pandas/core/groupby/groupby.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 05bf1b86dc062..49b47545d6297 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1033,7 +1033,7 @@ def get_group(self, name, obj=None) -> DataFrame | Series: owl 1 2 3 toucan 1 5 6 eagle 7 8 9 - >>> df.groupby(by=["a"]).get_group(1) + >>> df.groupby(by=["a"]).get_group((1,)) a b c owl 1 2 3 toucan 1 5 6 @@ -1055,8 +1055,9 @@ def get_group(self, name, obj=None) -> DataFrame | Series: """ keys = self.keys level = self.level - if (is_list_like(level) and len(level) == 1) or ( - is_list_like(keys) and len(keys) == 1 + # mypy doesn't recognize level/keys as being sized when passed to len + if (is_list_like(level) and len(level) == 1) or ( # type: ignore[arg-type] + is_list_like(keys) and len(keys) == 1 # type: ignore[arg-type] ): # GH#25971 if isinstance(name, tuple) and len(name) == 1: