From 3d07daf1d4c19060401bc3842e0195be3e4c59da Mon Sep 17 00:00:00 2001 From: richard Date: Sun, 4 Dec 2022 22:54:33 -0500 Subject: [PATCH] DEPR: Enforce DataFrameGroupBy.__iter__ returning tuples of length 1 for keys --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/core/groupby/groupby.py | 17 ++++------------- pandas/tests/groupby/test_groupby.py | 16 ++++------------ 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b70dcb0ae99fa..b1400be59b3a1 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -596,7 +596,7 @@ Removal of prior version deprecations/changes - Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`) - Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`) - Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`) -- +- When providing a list of columns of length one to :meth:`DataFrame.groupby`, the keys that are returned by iterating over the resulting :class:`DataFrameGroupBy` object will now be tuples of length one (:issue:`47761`) .. --------------------------------------------------------------------------- .. _whatsnew_200.performance: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 659ca228bdcb0..c7fb40e855ef7 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -70,7 +70,6 @@ class providing the base-class of operations. cache_readonly, doc, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ensure_dtype_can_hold_na from pandas.core.dtypes.common import ( @@ -832,19 +831,11 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: for each group """ keys = self.keys + result = self.grouper.get_iterator(self._selected_obj, axis=self.axis) if isinstance(keys, list) and len(keys) == 1: - warnings.warn( - ( - "In a future version of pandas, a length 1 " - "tuple will be returned when iterating over a " - "groupby with a grouper equal to a list of " - "length 1. Don't supply a list with a single grouper " - "to avoid this warning." - ), - FutureWarning, - stacklevel=find_stack_level(), - ) - return self.grouper.get_iterator(self._selected_obj, axis=self.axis) + # GH#42795 - when keys is a list, return tuples even when length is 1 + result = (((key,), group) for key, group in result) + return result # To track operations that expand dimensions, like ohlc diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index a7104c2e21049..667656cb4de02 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2743,18 +2743,10 @@ def test_groupby_none_column_name(): def test_single_element_list_grouping(): # GH 42795 - df = DataFrame( - {"a": [np.nan, 1], "b": [np.nan, 5], "c": [np.nan, 2]}, index=["x", "y"] - ) - msg = ( - "In a future version of pandas, a length 1 " - "tuple will be returned when iterating over " - "a groupby with a grouper equal to a list of " - "length 1. Don't supply a list with a single grouper " - "to avoid this warning." - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - values, _ = next(iter(df.groupby(["a"]))) + df = DataFrame({"a": [1, 2], "b": [np.nan, 5], "c": [np.nan, 2]}, index=["x", "y"]) + result = [key for key, _ in df.groupby(["a"])] + expected = [(1,), (2,)] + assert result == expected @pytest.mark.parametrize("func", ["sum", "cumsum", "cumprod", "prod"])