DEP: Enforce deprecation of squeeze argument in groupby (#49082)

phofl · web-flow · commit fe93a8395081 · 2022-10-20T12:03:28.000+01:00
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -144,6 +144,7 @@ Deprecations
 
 Removal of prior version deprecations/changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- Remove argument ``squeeze`` from :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`32380`)
 - Removed ``keep_tz`` argument in :meth:`DatetimeIndex.to_series` (:issue:`29731`)
 - Remove arguments ``names`` and ``dtype`` from :meth:`Index.copy` and ``levels`` and ``codes`` from :meth:`MultiIndex.copy` (:issue:`35853`, :issue:`36685`)
 - Removed argument ``try_cast`` from :meth:`DataFrame.mask`, :meth:`DataFrame.where`, :meth:`Series.mask` and :meth:`Series.where` (:issue:`38836`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -8384,24 +8384,11 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool | lib.NoDefault = no_default,
-        squeeze: bool | lib.NoDefault = no_default,
         observed: bool = False,
         dropna: bool = True,
     ) -> DataFrameGroupBy:
         from pandas.core.groupby.generic import DataFrameGroupBy
 
-        if squeeze is not no_default:
-            warnings.warn(
-                (
-                    "The `squeeze` parameter is deprecated and "
-                    "will be removed in a future version."
-                ),
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-        else:
-            squeeze = False
-
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
         axis = self._get_axis_number(axis)
@@ -8414,7 +8401,6 @@ def groupby(
             as_index=as_index,
             sort=sort,
             group_keys=group_keys,
-            squeeze=squeeze,
             observed=observed,
             dropna=dropna,
         )
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1317,33 +1317,6 @@ def _wrap_applied_output_series(
 
         all_indexed_same = all_indexes_same(x.index for x in values)
 
-        # GH3596
-        # provide a reduction (Frame -> Series) if groups are
-        # unique
-        if self.squeeze:
-            applied_index = self._selected_obj._get_axis(self.axis)
-            singular_series = len(values) == 1 and applied_index.nlevels == 1
-
-            if singular_series:
-                # GH2893
-                # we have series in the values array, we want to
-                # produce a series:
-                # if any of the sub-series are not indexed the same
-                # OR we don't have a multi-index and we have only a
-                # single values
-                return self._concat_objects(
-                    values,
-                    not_indexed_same=not_indexed_same,
-                    override_group_keys=override_group_keys,
-                )
-
-            # still a series
-            # path added as of GH 5545
-            elif all_indexed_same:
-                from pandas.core.reshape.concat import concat
-
-                return concat(values)
-
         if not all_indexed_same:
             # GH 8467
             return self._concat_objects(
@@ -1673,7 +1646,6 @@ def _gotitem(self, key, ndim: int, subset=None):
                 as_index=self.as_index,
                 sort=self.sort,
                 group_keys=self.group_keys,
-                squeeze=self.squeeze,
                 observed=self.observed,
                 mutated=self.mutated,
                 dropna=self.dropna,
@@ -1688,7 +1660,6 @@ def _gotitem(self, key, ndim: int, subset=None):
                 selection=key,
                 sort=self.sort,
                 group_keys=self.group_keys,
-                squeeze=self.squeeze,
                 observed=self.observed,
                 dropna=self.dropna,
             )
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -645,7 +645,6 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
         "obj",
         "observed",
         "sort",
-        "squeeze",
     }
 
     axis: AxisInt
@@ -929,7 +928,6 @@ def __init__(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool | lib.NoDefault = True,
-        squeeze: bool = False,
         observed: bool = False,
         mutated: bool = False,
         dropna: bool = True,
@@ -951,7 +949,6 @@ def __init__(
         self.keys = keys
         self.sort = sort
         self.group_keys = group_keys
-        self.squeeze = squeeze
         self.observed = observed
         self.mutated = mutated
         self.dropna = dropna
@@ -4328,7 +4325,6 @@ def get_groupby(
     as_index: bool = True,
     sort: bool = True,
     group_keys: bool | lib.NoDefault = True,
-    squeeze: bool = False,
     observed: bool = False,
     mutated: bool = False,
     dropna: bool = True,
@@ -4357,7 +4353,6 @@ def get_groupby(
         as_index=as_index,
         sort=sort,
         group_keys=group_keys,
-        squeeze=squeeze,
         observed=observed,
         mutated=mutated,
         dropna=dropna,
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -164,7 +164,6 @@ def __init__(
         # [int, Literal['index', 'columns', 'rows']]", variable has type "int")
         self.axis = axis  # type: ignore[assignment]
         self.kind = kind
-        self.squeeze = False
         self.group_keys = group_keys
         self.as_index = True
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2026,24 +2026,11 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool | lib.NoDefault = no_default,
-        squeeze: bool | lib.NoDefault = no_default,
         observed: bool = False,
         dropna: bool = True,
     ) -> SeriesGroupBy:
         from pandas.core.groupby.generic import SeriesGroupBy
 
-        if squeeze is not no_default:
-            warnings.warn(
-                (
-                    "The `squeeze` parameter is deprecated and "
-                    "will be removed in a future version."
-                ),
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-        else:
-            squeeze = False
-
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
         axis = self._get_axis_number(axis)
@@ -2056,7 +2043,6 @@ def groupby(
             as_index=as_index,
             sort=sort,
             group_keys=group_keys,
-            squeeze=squeeze,
             observed=observed,
             dropna=dropna,
         )
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
@@ -133,11 +133,6 @@
        result from ``apply`` is a like-indexed Series or DataFrame.
        Specify ``group_keys`` explicitly to include the group keys or
        not.
-squeeze : bool, default False
-    Reduce the dimensionality of the return type if possible,
-    otherwise return a consistent type.
-
-    .. deprecated:: 1.1.0
 
 observed : bool, default False
     This only applies if any of the groupers are Categoricals.
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -109,46 +109,6 @@ def max_value(group):
     tm.assert_series_equal(result, expected)
 
 
-def test_groupby_return_type():
-
-    # GH2893, return a reduced type
-
-    def func(dataf):
-        return dataf["val2"] - dataf["val2"].mean()
-
-    df1 = DataFrame(
-        [
-            {"val1": 1, "val2": 20},
-            {"val1": 1, "val2": 19},
-            {"val1": 2, "val2": 27},
-            {"val1": 2, "val2": 12},
-        ]
-    )
-
-    with tm.assert_produces_warning(FutureWarning):
-        result = df1.groupby("val1", squeeze=True).apply(func)
-    assert isinstance(result, Series)
-
-    df2 = DataFrame(
-        [
-            {"val1": 1, "val2": 20},
-            {"val1": 1, "val2": 19},
-            {"val1": 1, "val2": 27},
-            {"val1": 1, "val2": 12},
-        ]
-    )
-
-    with tm.assert_produces_warning(FutureWarning):
-        result = df2.groupby("val1", squeeze=True).apply(func)
-    assert isinstance(result, Series)
-
-    # GH3596, return a consistent type (regression in 0.11 from 0.10.1)
-    df = DataFrame([[1, 1], [1, 1]], columns=["X", "Y"])
-    with tm.assert_produces_warning(FutureWarning):
-        result = df.groupby("X", squeeze=False).count()
-    assert isinstance(result, DataFrame)
-
-
 def test_inconsistent_return_type():
     # GH5592
     # inconsistent return type
@@ -2498,7 +2458,6 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key():
         (DataFrame, "as_index", False),
         (DataFrame, "sort", False),
         (DataFrame, "group_keys", False),
-        (DataFrame, "squeeze", True),
         (DataFrame, "observed", True),
         (DataFrame, "dropna", False),
         pytest.param(
@@ -2513,14 +2472,10 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key():
         (Series, "as_index", False),
         (Series, "sort", False),
         (Series, "group_keys", False),
-        (Series, "squeeze", True),
         (Series, "observed", True),
         (Series, "dropna", False),
     ],
 )
-@pytest.mark.filterwarnings(
-    "ignore:The `squeeze` parameter is deprecated:FutureWarning"
-)
 def test_subsetting_columns_keeps_attrs(klass, attr, value):
     # GH 9959 - When subsetting columns, don't drop attributes
     df = DataFrame({"a": [1], "b": [2], "c": [3]})
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
@@ -887,9 +887,7 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze(
         # We need to create a GroupBy object with only one non-NaT group,
         #  so use a huge freq so that all non-NaT dates will be grouped together
         tdg = Grouper(key="Date", freq="100Y")
-
-        with tm.assert_produces_warning(FutureWarning, match="`squeeze` parameter"):
-            gb = df.groupby(tdg, squeeze=True)
+        gb = df.groupby(tdg)
 
         # check that we will go through the singular_series path
         #  in _wrap_applied_output_series
@@ -899,13 +897,12 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze(
         # function that returns a Series
         res = gb.apply(lambda x: x["Quantity"] * 2)
 
-        key = Timestamp("2013-12-31")
-        ordering = df["Date"].sort_values().dropna().index
-        mi = MultiIndex.from_product([[key], ordering], names=["Date", None])
-
-        ex_values = df["Quantity"].take(ordering).values * 2
-        expected = Series(ex_values, index=mi, name="Quantity")
-        tm.assert_series_equal(res, expected)
+        expected = DataFrame(
+            [[36, 6, 6, 10, 2]],
+            index=Index([Timestamp("2013-12-31")], name="Date"),
+            columns=Index([0, 1, 5, 2, 3], name="Quantity"),
+        )
+        tm.assert_frame_equal(res, expected)
 
     @td.skip_if_no("numba")
     def test_groupby_agg_numba_timegrouper_with_nat(