From 56e3b8544e973d418f5ccd069f090d7f5977b809 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 7 Mar 2023 12:02:50 +0100 Subject: [PATCH 1/7] DEPR: scalar index for length-1-list level groupby --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/groupby/groupby.py | 9 ++++++++- pandas/tests/groupby/test_groupby.py | 16 ++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 15e3d66ecc551..af3c095d94216 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -816,6 +816,7 @@ Deprecations - Deprecated :meth:`DataFrame.pad` in favor of :meth:`DataFrame.ffill` (:issue:`33396`) - Deprecated :meth:`DataFrame.backfill` in favor of :meth:`DataFrame.bfill` (:issue:`33396`) - Deprecated :meth:`~pandas.io.stata.StataReader.close`. Use :class:`~pandas.io.stata.StataReader` as a context manager instead (:issue:`49228`) +- Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a ``level`` parameter that is a list of length 1; A tuple of length one will be returned instead (:issue:`51583`) .. --------------------------------------------------------------------------- .. _whatsnew_200.prior_deprecations: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 457352564f255..b8b144c05b4cd 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -810,9 +810,16 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: for each group """ keys = self.keys + level = self.level result = self.grouper.get_iterator(self._selected_obj, axis=self.axis) - if isinstance(keys, list) and len(keys) == 1: + if ( + isinstance(keys, list) + and len(keys) == 1 + or isinstance(level, list) + and len(level, list) + ): # GH#42795 - when keys is a list, return tuples even when length is 1 + # GH#51583 - when level is a list, return tuples even when length is 1 result = (((key,), group) for key, group in result) return result diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index ea4bb42fb7ee1..0045adc653225 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2709,6 +2709,22 @@ def test_single_element_list_grouping(): assert result == expected +def test_single_element_list_level_grouping(): + # GH 51583 + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) + result = [key for key, _ in df.groupby(level=[0])] + expected = [("x",), ("y",)] + assert result == expected + + +def test_single_element_list_multiindex_level_grouping(): + # GH 51583 + df = MultiIndex.from_product([[1, 2], [3, 4]], names=["x", "y"]).to_frame() + result = [key for key, _ in df.groupby(level=[0])] + expected = [(1,), (2,)] + assert result == expected + + @pytest.mark.parametrize("func", ["sum", "cumsum", "cumprod", "prod"]) def test_groupby_avoid_casting_to_float(func): # GH#37493 From ffe33d29c68078ea3cd0d48cd5b980769fc5fcf2 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 7 Mar 2023 13:13:04 +0100 Subject: [PATCH 2/7] fixed typo --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b8b144c05b4cd..b5e21715dd6b0 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -816,7 +816,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: isinstance(keys, list) and len(keys) == 1 or isinstance(level, list) - and len(level, list) + and len(level) == 1 ): # GH#42795 - when keys is a list, return tuples even when length is 1 # GH#51583 - when level is a list, return tuples even when length is 1 From eb5794a3b89f775a43454049a1f368b4d90e229c Mon Sep 17 00:00:00 2001 From: root Date: Tue, 7 Mar 2023 15:01:18 +0100 Subject: [PATCH 3/7] added level field in BaseGroupBy --- pandas/core/groupby/groupby.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b5e21715dd6b0..84c227ae4a59c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -626,6 +626,7 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): axis: AxisInt grouper: ops.BaseGrouper keys: _KeysArgType | None = None + level: IndexLabel | None = None group_keys: bool | lib.NoDefault @final From d9b33dc1aeae9d909563eac2ee858ad38f30ef8f Mon Sep 17 00:00:00 2001 From: root Date: Mon, 13 Mar 2023 18:14:30 +0100 Subject: [PATCH 4/7] Changed PR to deprecate, not change, behaviour --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/core/groupby/groupby.py | 19 +++++++++++------- pandas/tests/groupby/test_groupby.py | 30 ++++++++++++++++++---------- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index c1ba87d451b7b..8dc559374c5cb 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -815,7 +815,7 @@ Deprecations - Deprecated :meth:`DataFrame.pad` in favor of :meth:`DataFrame.ffill` (:issue:`33396`) - Deprecated :meth:`DataFrame.backfill` in favor of :meth:`DataFrame.bfill` (:issue:`33396`) - Deprecated :meth:`~pandas.io.stata.StataReader.close`. Use :class:`~pandas.io.stata.StataReader` as a context manager instead (:issue:`49228`) -- Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a ``level`` parameter that is a list of length 1; A tuple of length one will be returned instead (:issue:`51583`) +- Deprecated producing a scalar when iterating over a :class:`.DataFrameGroupBy` or a :class:`.SeriesGroupBy` that has been grouped by a ``level`` parameter that is a list of length 1; a tuple of length one will be returned instead (:issue:`51583`) .. --------------------------------------------------------------------------- .. _whatsnew_200.prior_deprecations: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5ad24cef9e28e..e03e96985061a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -68,6 +68,7 @@ class providing the base-class of operations. cache_readonly, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ensure_dtype_can_hold_na from pandas.core.dtypes.common import ( @@ -813,14 +814,18 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: keys = self.keys level = self.level result = self.grouper.get_iterator(self._selected_obj, axis=self.axis) - if ( - isinstance(keys, list) - and len(keys) == 1 - or isinstance(level, list) - and len(level) == 1 - ): + if isinstance(level, list) and len(level) == 1: + # GH 51583 + warnings.warn( + "Initializing a Groupby object with a length-1 list " + "level parameter will yield indexes as tuples in a future version. " + "To keep indexes as scalars, initialize Groupby objects with " + "a scalar level parameter instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if isinstance(keys, list) and len(keys) == 1: # GH#42795 - when keys is a list, return tuples even when length is 1 - # GH#51583 - when level is a list, return tuples even when length is 1 result = (((key,), group) for key, group in result) return result diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 0045adc653225..f3f590f71f3e2 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2709,20 +2709,30 @@ def test_single_element_list_grouping(): assert result == expected -def test_single_element_list_level_grouping(): +def test_single_element_list_level_grouping_deprecation(): # GH 51583 - df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) - result = [key for key, _ in df.groupby(level=[0])] - expected = [("x",), ("y",)] - assert result == expected + depr_msg = ( + "Initializing a Groupby object with a length-1 list " + "level parameter will yield indexes as tuples in a future version. " + "To keep indexes as scalars, initialize Groupby objects with " + "a scalar level parameter instead." + ) + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) + [key for key, _ in df.groupby(level=[0])] -def test_single_element_list_multiindex_level_grouping(): +def test_multiindex_single_element_list_level_grouping_deprecation(): # GH 51583 - df = MultiIndex.from_product([[1, 2], [3, 4]], names=["x", "y"]).to_frame() - result = [key for key, _ in df.groupby(level=[0])] - expected = [(1,), (2,)] - assert result == expected + depr_msg = ( + "Initializing a Groupby object with a length-1 list " + "level parameter will yield indexes as tuples in a future version. " + "To keep indexes as scalars, initialize Groupby objects with " + "a scalar level parameter instead." + ) + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + df = MultiIndex.from_product([[1, 2], [3, 4]], names=["x", "y"]).to_frame() + [key for key, _ in df.groupby(level=[0])] @pytest.mark.parametrize("func", ["sum", "cumsum", "cumprod", "prod"]) From 6071acd56626037003646d0e05ca1b06142642eb Mon Sep 17 00:00:00 2001 From: root Date: Wed, 15 Mar 2023 16:45:09 +0100 Subject: [PATCH 5/7] Altered deprecation to include any 'list-like' level parameter --- pandas/core/groupby/groupby.py | 7 ++++--- pandas/tests/groupby/test_groupby.py | 25 +++++++++++++++++++------ 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e03e96985061a..d9219295e9246 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -77,6 +77,7 @@ class providing the base-class of operations. is_hashable, is_integer, is_integer_dtype, + is_list_like, is_numeric_dtype, is_object_dtype, is_scalar, @@ -814,12 +815,12 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: keys = self.keys level = self.level result = self.grouper.get_iterator(self._selected_obj, axis=self.axis) - if isinstance(level, list) and len(level) == 1: + if is_list_like(level) and len(level) == 1: # GH 51583 warnings.warn( - "Initializing a Groupby object with a length-1 list " + "Creating a Groupby object with a length-1 list " "level parameter will yield indexes as tuples in a future version. " - "To keep indexes as scalars, initialize Groupby objects with " + "To keep indexes as scalars, create Groupby objects with " "a scalar level parameter instead.", FutureWarning, stacklevel=find_stack_level(), diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index f3f590f71f3e2..083f4103e6e2a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2711,27 +2711,40 @@ def test_single_element_list_grouping(): def test_single_element_list_level_grouping_deprecation(): # GH 51583 + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) depr_msg = ( - "Initializing a Groupby object with a length-1 list " + "Creating a Groupby object with a length-1 list " "level parameter will yield indexes as tuples in a future version. " - "To keep indexes as scalars, initialize Groupby objects with " + "To keep indexes as scalars, create Groupby objects with " "a scalar level parameter instead." ) with tm.assert_produces_warning(FutureWarning, match=depr_msg): - df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) [key for key, _ in df.groupby(level=[0])] +def test_single_element_tuple_level_grouping_deprecation(): + # GH 51583 + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) + depr_msg = ( + "Creating a Groupby object with a length-1 list " + "level parameter will yield indexes as tuples in a future version. " + "To keep indexes as scalars, create Groupby objects with " + "a scalar level parameter instead." + ) + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + [key for key, _ in df.groupby(level=(0,))] + + def test_multiindex_single_element_list_level_grouping_deprecation(): # GH 51583 + df = MultiIndex.from_product([[1, 2], [3, 4]], names=["x", "y"]).to_frame() depr_msg = ( - "Initializing a Groupby object with a length-1 list " + "Creating a Groupby object with a length-1 list " "level parameter will yield indexes as tuples in a future version. " - "To keep indexes as scalars, initialize Groupby objects with " + "To keep indexes as scalars, create Groupby objects with " "a scalar level parameter instead." ) with tm.assert_produces_warning(FutureWarning, match=depr_msg): - df = MultiIndex.from_product([[1, 2], [3, 4]], names=["x", "y"]).to_frame() [key for key, _ in df.groupby(level=[0])] From 0b743ce643b110c0c1c4bba29eca5baf87919bbc Mon Sep 17 00:00:00 2001 From: root Date: Thu, 16 Mar 2023 13:26:24 +0100 Subject: [PATCH 6/7] Added type ignore --- pandas/core/groupby/groupby.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index d9219295e9246..99f462a1a46e5 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -815,7 +815,8 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: keys = self.keys level = self.level result = self.grouper.get_iterator(self._selected_obj, axis=self.axis) - if is_list_like(level) and len(level) == 1: + # error: Argument 1 to "len" has incompatible type "Hashable"; expected "Sized" + if is_list_like(level) and len(level) == 1: # type: ignore[arg-type] # GH 51583 warnings.warn( "Creating a Groupby object with a length-1 list " From 8d4eb0dbce647c121ce386ce0cc700a5a704918a Mon Sep 17 00:00:00 2001 From: root Date: Fri, 17 Mar 2023 12:21:36 +0100 Subject: [PATCH 7/7] Parametrized tests --- pandas/core/groupby/groupby.py | 2 +- pandas/tests/groupby/test_groupby.py | 37 ++++++---------------------- 2 files changed, 9 insertions(+), 30 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 99f462a1a46e5..f0cf9abc5bbaf 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -819,7 +819,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: if is_list_like(level) and len(level) == 1: # type: ignore[arg-type] # GH 51583 warnings.warn( - "Creating a Groupby object with a length-1 list " + "Creating a Groupby object with a length-1 list-like " "level parameter will yield indexes as tuples in a future version. " "To keep indexes as scalars, create Groupby objects with " "a scalar level parameter instead.", diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 083f4103e6e2a..2441be4528c99 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2709,43 +2709,22 @@ def test_single_element_list_grouping(): assert result == expected -def test_single_element_list_level_grouping_deprecation(): - # GH 51583 - df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) - depr_msg = ( - "Creating a Groupby object with a length-1 list " - "level parameter will yield indexes as tuples in a future version. " - "To keep indexes as scalars, create Groupby objects with " - "a scalar level parameter instead." - ) - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - [key for key, _ in df.groupby(level=[0])] - - -def test_single_element_tuple_level_grouping_deprecation(): +@pytest.mark.parametrize( + "level_arg, multiindex", [([0], False), ((0,), False), ([0], True), ((0,), True)] +) +def test_single_element_listlike_level_grouping_deprecation(level_arg, multiindex): # GH 51583 df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) + if multiindex: + df = df.set_index(["a", "b"]) depr_msg = ( - "Creating a Groupby object with a length-1 list " - "level parameter will yield indexes as tuples in a future version. " - "To keep indexes as scalars, create Groupby objects with " - "a scalar level parameter instead." - ) - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - [key for key, _ in df.groupby(level=(0,))] - - -def test_multiindex_single_element_list_level_grouping_deprecation(): - # GH 51583 - df = MultiIndex.from_product([[1, 2], [3, 4]], names=["x", "y"]).to_frame() - depr_msg = ( - "Creating a Groupby object with a length-1 list " + "Creating a Groupby object with a length-1 list-like " "level parameter will yield indexes as tuples in a future version. " "To keep indexes as scalars, create Groupby objects with " "a scalar level parameter instead." ) with tm.assert_produces_warning(FutureWarning, match=depr_msg): - [key for key, _ in df.groupby(level=[0])] + [key for key, _ in df.groupby(level=level_arg)] @pytest.mark.parametrize("func", ["sum", "cumsum", "cumprod", "prod"])