diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index db7562e409fd1..9f867415a6cd3 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -144,6 +144,7 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- Remove argument ``squeeze`` from :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`32380`) - Removed ``keep_tz`` argument in :meth:`DatetimeIndex.to_series` (:issue:`29731`) - Remove arguments ``names`` and ``dtype`` from :meth:`Index.copy` and ``levels`` and ``codes`` from :meth:`MultiIndex.copy` (:issue:`35853`, :issue:`36685`) - Removed argument ``try_cast`` from :meth:`DataFrame.mask`, :meth:`DataFrame.where`, :meth:`Series.mask` and :meth:`Series.where` (:issue:`38836`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 396a794d26c64..0737836bf412f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8384,24 +8384,11 @@ def groupby( as_index: bool = True, sort: bool = True, group_keys: bool | lib.NoDefault = no_default, - squeeze: bool | lib.NoDefault = no_default, observed: bool = False, dropna: bool = True, ) -> DataFrameGroupBy: from pandas.core.groupby.generic import DataFrameGroupBy - if squeeze is not no_default: - warnings.warn( - ( - "The `squeeze` parameter is deprecated and " - "will be removed in a future version." - ), - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - squeeze = False - if level is None and by is None: raise TypeError("You have to supply one of 'by' and 'level'") axis = self._get_axis_number(axis) @@ -8414,7 +8401,6 @@ def groupby( as_index=as_index, sort=sort, group_keys=group_keys, - squeeze=squeeze, observed=observed, dropna=dropna, ) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7dbd0a609642e..2bac228ef74a0 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1317,33 +1317,6 @@ def _wrap_applied_output_series( all_indexed_same = all_indexes_same(x.index for x in values) - # GH3596 - # provide a reduction (Frame -> Series) if groups are - # unique - if self.squeeze: - applied_index = self._selected_obj._get_axis(self.axis) - singular_series = len(values) == 1 and applied_index.nlevels == 1 - - if singular_series: - # GH2893 - # we have series in the values array, we want to - # produce a series: - # if any of the sub-series are not indexed the same - # OR we don't have a multi-index and we have only a - # single values - return self._concat_objects( - values, - not_indexed_same=not_indexed_same, - override_group_keys=override_group_keys, - ) - - # still a series - # path added as of GH 5545 - elif all_indexed_same: - from pandas.core.reshape.concat import concat - - return concat(values) - if not all_indexed_same: # GH 8467 return self._concat_objects( @@ -1673,7 +1646,6 @@ def _gotitem(self, key, ndim: int, subset=None): as_index=self.as_index, sort=self.sort, group_keys=self.group_keys, - squeeze=self.squeeze, observed=self.observed, mutated=self.mutated, dropna=self.dropna, @@ -1688,7 +1660,6 @@ def _gotitem(self, key, ndim: int, subset=None): selection=key, sort=self.sort, group_keys=self.group_keys, - squeeze=self.squeeze, observed=self.observed, dropna=self.dropna, ) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 8c637901e720f..c02adfcd678e5 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -645,7 +645,6 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): "obj", "observed", "sort", - "squeeze", } axis: AxisInt @@ -929,7 +928,6 @@ def __init__( as_index: bool = True, sort: bool = True, group_keys: bool | lib.NoDefault = True, - squeeze: bool = False, observed: bool = False, mutated: bool = False, dropna: bool = True, @@ -951,7 +949,6 @@ def __init__( self.keys = keys self.sort = sort self.group_keys = group_keys - self.squeeze = squeeze self.observed = observed self.mutated = mutated self.dropna = dropna @@ -4328,7 +4325,6 @@ def get_groupby( as_index: bool = True, sort: bool = True, group_keys: bool | lib.NoDefault = True, - squeeze: bool = False, observed: bool = False, mutated: bool = False, dropna: bool = True, @@ -4357,7 +4353,6 @@ def get_groupby( as_index=as_index, sort=sort, group_keys=group_keys, - squeeze=squeeze, observed=observed, mutated=mutated, dropna=dropna, diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f64ca90537b3f..9f9fdef089353 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -164,7 +164,6 @@ def __init__( # [int, Literal['index', 'columns', 'rows']]", variable has type "int") self.axis = axis # type: ignore[assignment] self.kind = kind - self.squeeze = False self.group_keys = group_keys self.as_index = True diff --git a/pandas/core/series.py b/pandas/core/series.py index 98fc797806d11..721995fef1a8e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2026,24 +2026,11 @@ def groupby( as_index: bool = True, sort: bool = True, group_keys: bool | lib.NoDefault = no_default, - squeeze: bool | lib.NoDefault = no_default, observed: bool = False, dropna: bool = True, ) -> SeriesGroupBy: from pandas.core.groupby.generic import SeriesGroupBy - if squeeze is not no_default: - warnings.warn( - ( - "The `squeeze` parameter is deprecated and " - "will be removed in a future version." - ), - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - squeeze = False - if level is None and by is None: raise TypeError("You have to supply one of 'by' and 'level'") axis = self._get_axis_number(axis) @@ -2056,7 +2043,6 @@ def groupby( as_index=as_index, sort=sort, group_keys=group_keys, - squeeze=squeeze, observed=observed, dropna=dropna, ) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 210d7d79bfe15..cfabe05ec9e3b 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -133,11 +133,6 @@ result from ``apply`` is a like-indexed Series or DataFrame. Specify ``group_keys`` explicitly to include the group keys or not. -squeeze : bool, default False - Reduce the dimensionality of the return type if possible, - otherwise return a consistent type. - - .. deprecated:: 1.1.0 observed : bool, default False This only applies if any of the groupers are Categoricals. diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 8439e4b8c9a00..74b4d5dc19ca1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -109,46 +109,6 @@ def max_value(group): tm.assert_series_equal(result, expected) -def test_groupby_return_type(): - - # GH2893, return a reduced type - - def func(dataf): - return dataf["val2"] - dataf["val2"].mean() - - df1 = DataFrame( - [ - {"val1": 1, "val2": 20}, - {"val1": 1, "val2": 19}, - {"val1": 2, "val2": 27}, - {"val1": 2, "val2": 12}, - ] - ) - - with tm.assert_produces_warning(FutureWarning): - result = df1.groupby("val1", squeeze=True).apply(func) - assert isinstance(result, Series) - - df2 = DataFrame( - [ - {"val1": 1, "val2": 20}, - {"val1": 1, "val2": 19}, - {"val1": 1, "val2": 27}, - {"val1": 1, "val2": 12}, - ] - ) - - with tm.assert_produces_warning(FutureWarning): - result = df2.groupby("val1", squeeze=True).apply(func) - assert isinstance(result, Series) - - # GH3596, return a consistent type (regression in 0.11 from 0.10.1) - df = DataFrame([[1, 1], [1, 1]], columns=["X", "Y"]) - with tm.assert_produces_warning(FutureWarning): - result = df.groupby("X", squeeze=False).count() - assert isinstance(result, DataFrame) - - def test_inconsistent_return_type(): # GH5592 # inconsistent return type @@ -2498,7 +2458,6 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key(): (DataFrame, "as_index", False), (DataFrame, "sort", False), (DataFrame, "group_keys", False), - (DataFrame, "squeeze", True), (DataFrame, "observed", True), (DataFrame, "dropna", False), pytest.param( @@ -2513,14 +2472,10 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key(): (Series, "as_index", False), (Series, "sort", False), (Series, "group_keys", False), - (Series, "squeeze", True), (Series, "observed", True), (Series, "dropna", False), ], ) -@pytest.mark.filterwarnings( - "ignore:The `squeeze` parameter is deprecated:FutureWarning" -) def test_subsetting_columns_keeps_attrs(klass, attr, value): # GH 9959 - When subsetting columns, don't drop attributes df = DataFrame({"a": [1], "b": [2], "c": [3]}) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index ae725cbb2b588..a71719f3da4f7 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -887,9 +887,7 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze( # We need to create a GroupBy object with only one non-NaT group, # so use a huge freq so that all non-NaT dates will be grouped together tdg = Grouper(key="Date", freq="100Y") - - with tm.assert_produces_warning(FutureWarning, match="`squeeze` parameter"): - gb = df.groupby(tdg, squeeze=True) + gb = df.groupby(tdg) # check that we will go through the singular_series path # in _wrap_applied_output_series @@ -899,13 +897,12 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze( # function that returns a Series res = gb.apply(lambda x: x["Quantity"] * 2) - key = Timestamp("2013-12-31") - ordering = df["Date"].sort_values().dropna().index - mi = MultiIndex.from_product([[key], ordering], names=["Date", None]) - - ex_values = df["Quantity"].take(ordering).values * 2 - expected = Series(ex_values, index=mi, name="Quantity") - tm.assert_series_equal(res, expected) + expected = DataFrame( + [[36, 6, 6, 10, 2]], + index=Index([Timestamp("2013-12-31")], name="Date"), + columns=Index([0, 1, 5, 2, 3], name="Quantity"), + ) + tm.assert_frame_equal(res, expected) @td.skip_if_no("numba") def test_groupby_agg_numba_timegrouper_with_nat(