From 6d47d3e80c80661d45f07446d3a99d744c987170 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 9 Jun 2023 01:27:18 +0200 Subject: [PATCH 1/6] DEPR: deprecate obj argument in GroupBy.get_group --- pandas/core/groupby/groupby.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5d15be19f34f7..5be97d9ea612c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -906,7 +906,7 @@ def pipe( return com.pipe(self, func, *args, **kwargs) @final - def get_group(self, name, obj=None) -> DataFrame | Series: + def get_group(self, name) -> DataFrame | Series: """ Construct DataFrame from group with provided name. @@ -914,14 +914,10 @@ def get_group(self, name, obj=None) -> DataFrame | Series: ---------- name : object The name of the group to get as a DataFrame. - obj : DataFrame, default None - The DataFrame to take the DataFrame out of. If - it is None, the object groupby was called on will - be used. Returns ------- - same type as obj + DataFrame Examples -------- @@ -955,14 +951,11 @@ def get_group(self, name, obj=None) -> DataFrame | Series: owl 1 2 3 toucan 1 5 6 """ - if obj is None: - obj = self._selected_obj - inds = self._get_index(name) if not len(inds): raise KeyError(name) - return obj._take_with_is_copy(inds, axis=self.axis) + return self._selected_obj.iloc[inds] @final def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: From 808f0f5f756d19a1eb1ca03af3d5d708ace3374f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 10 Jun 2023 20:37:27 +0200 Subject: [PATCH 2/6] DEPR: deprecate param obj in GroupBy.get_group --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/groupby/groupby.py | 25 ++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index b9ad494172bdf..37e89ed434d94 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -278,6 +278,7 @@ Deprecations - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`) +- Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5be97d9ea612c..faccf0a79c857 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -906,7 +906,7 @@ def pipe( return com.pipe(self, func, *args, **kwargs) @final - def get_group(self, name) -> DataFrame | Series: + def get_group(self, name, obj=None) -> DataFrame | Series: """ Construct DataFrame from group with provided name. @@ -914,10 +914,19 @@ def get_group(self, name) -> DataFrame | Series: ---------- name : object The name of the group to get as a DataFrame. + obj : DataFrame, default None + The DataFrame to take the DataFrame out of. If + it is None, the object groupby was called on will + be used. + + .. deprecated:: 2.1.0 + The convert_dtype has been deprecated. + Do ``df.iloc[gb.indices.get(name)]`` + instead of ``gb.get_group(name, obj=df)``. Returns ------- - DataFrame + same type as obj Examples -------- @@ -951,11 +960,21 @@ def get_group(self, name) -> DataFrame | Series: owl 1 2 3 toucan 1 5 6 """ + warnings.warn( + "obj is deprecated and will be removed in a future version. " + "Do ``df.iloc[gb.indices.get(name)]`` " + "instead of ``gb.get_group(name, obj=df)``.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if obj is None: + obj = self._selected_obj + inds = self._get_index(name) if not len(inds): raise KeyError(name) - return self._selected_obj.iloc[inds] + return obj._take_with_is_copy(inds, axis=self.axis) @final def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: From d2d0147da9df179aab09664e436527b545128a55 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 10 Jun 2023 22:32:07 +0200 Subject: [PATCH 3/6] correct get_group docs --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index faccf0a79c857..361b445efc273 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -920,7 +920,7 @@ def get_group(self, name, obj=None) -> DataFrame | Series: be used. .. deprecated:: 2.1.0 - The convert_dtype has been deprecated. + The obj is deprecated and will be removed in a future version. Do ``df.iloc[gb.indices.get(name)]`` instead of ``gb.get_group(name, obj=df)``. From f13101c638e1b52e61287d85980907c133035bd8 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 10 Jun 2023 23:47:47 +0200 Subject: [PATCH 4/6] make warning conditional --- pandas/core/groupby/groupby.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 361b445efc273..0169a698fee72 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -919,7 +919,7 @@ def get_group(self, name, obj=None) -> DataFrame | Series: it is None, the object groupby was called on will be used. - .. deprecated:: 2.1.0 + .. deprecated:: 2.1.0 The obj is deprecated and will be removed in a future version. Do ``df.iloc[gb.indices.get(name)]`` instead of ``gb.get_group(name, obj=df)``. @@ -960,21 +960,22 @@ def get_group(self, name, obj=None) -> DataFrame | Series: owl 1 2 3 toucan 1 5 6 """ - warnings.warn( - "obj is deprecated and will be removed in a future version. " - "Do ``df.iloc[gb.indices.get(name)]`` " - "instead of ``gb.get_group(name, obj=df)``.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if obj is None: - obj = self._selected_obj - inds = self._get_index(name) if not len(inds): raise KeyError(name) - return obj._take_with_is_copy(inds, axis=self.axis) + if obj is None: + obj = self._selected_obj + return self._selected_obj.iloc[inds] + else: + warnings.warn( + "obj is deprecated and will be removed in a future version. " + "Do ``df.iloc[gb.indices.get(name)]`` " + "instead of ``gb.get_group(name, obj=df)``.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return obj._take_with_is_copy(inds, axis=self.axis) @final def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: From 0076eb4ea417d59b4482e20610ef71af9146b28c Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sun, 11 Jun 2023 00:21:25 +0200 Subject: [PATCH 5/6] remove redundant line --- pandas/core/groupby/groupby.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 0169a698fee72..e5558fd720efb 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -965,7 +965,6 @@ def get_group(self, name, obj=None) -> DataFrame | Series: raise KeyError(name) if obj is None: - obj = self._selected_obj return self._selected_obj.iloc[inds] else: warnings.warn( From ef0b562b0fae96ee626cbdbfc02efa53e3f59f3f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sun, 18 Jun 2023 21:35:19 +0200 Subject: [PATCH 6/6] add a test for the warning --- pandas/tests/groupby/test_groupby.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index bf0b646847ed6..775016c673f4d 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -695,6 +695,16 @@ def test_as_index_select_column(): tm.assert_series_equal(result, expected) +def test_obj_arg_get_group_deprecated(): + depr_msg = "obj is deprecated" + + df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5]}) + expected = df.iloc[df.groupby("b").indices.get(4)] + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + result = df.groupby("b").get_group(4, obj=df) + tm.assert_frame_equal(result, expected) + + def test_groupby_as_index_select_column_sum_empty_df(): # GH 35246 df = DataFrame(columns=Index(["A", "B", "C"], name="alpha"))