Skip to content

DEP: Enforce deprecation of squeeze argument in groupby #49082

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 0 additions & 14 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8384,24 +8384,11 @@ def groupby(
as_index: bool = True,
sort: bool = True,
group_keys: bool | lib.NoDefault = no_default,
squeeze: bool | lib.NoDefault = no_default,
observed: bool = False,
dropna: bool = True,
) -> DataFrameGroupBy:
from pandas.core.groupby.generic import DataFrameGroupBy

if squeeze is not no_default:
warnings.warn(
(
"The `squeeze` parameter is deprecated and "
"will be removed in a future version."
),
FutureWarning,
stacklevel=find_stack_level(),
)
else:
squeeze = False

if level is None and by is None:
raise TypeError("You have to supply one of 'by' and 'level'")
axis = self._get_axis_number(axis)
Expand All @@ -8414,7 +8401,6 @@ def groupby(
as_index=as_index,
sort=sort,
group_keys=group_keys,
squeeze=squeeze,
observed=observed,
dropna=dropna,
)
Expand Down
29 changes: 0 additions & 29 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1317,33 +1317,6 @@ def _wrap_applied_output_series(

all_indexed_same = all_indexes_same(x.index for x in values)

# GH3596
# provide a reduction (Frame -> Series) if groups are
# unique
if self.squeeze:
applied_index = self._selected_obj._get_axis(self.axis)
singular_series = len(values) == 1 and applied_index.nlevels == 1

if singular_series:
# GH2893
# we have series in the values array, we want to
# produce a series:
# if any of the sub-series are not indexed the same
# OR we don't have a multi-index and we have only a
# single values
return self._concat_objects(
values,
not_indexed_same=not_indexed_same,
override_group_keys=override_group_keys,
)

# still a series
# path added as of GH 5545
elif all_indexed_same:
from pandas.core.reshape.concat import concat

return concat(values)

if not all_indexed_same:
# GH 8467
return self._concat_objects(
Expand Down Expand Up @@ -1673,7 +1646,6 @@ def _gotitem(self, key, ndim: int, subset=None):
as_index=self.as_index,
sort=self.sort,
group_keys=self.group_keys,
squeeze=self.squeeze,
observed=self.observed,
mutated=self.mutated,
dropna=self.dropna,
Expand All @@ -1688,7 +1660,6 @@ def _gotitem(self, key, ndim: int, subset=None):
selection=key,
sort=self.sort,
group_keys=self.group_keys,
squeeze=self.squeeze,
observed=self.observed,
dropna=self.dropna,
)
Expand Down
5 changes: 0 additions & 5 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,6 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin):
"obj",
"observed",
"sort",
"squeeze",
}

axis: AxisInt
Expand Down Expand Up @@ -929,7 +928,6 @@ def __init__(
as_index: bool = True,
sort: bool = True,
group_keys: bool | lib.NoDefault = True,
squeeze: bool = False,
observed: bool = False,
mutated: bool = False,
dropna: bool = True,
Expand All @@ -951,7 +949,6 @@ def __init__(
self.keys = keys
self.sort = sort
self.group_keys = group_keys
self.squeeze = squeeze
self.observed = observed
self.mutated = mutated
self.dropna = dropna
Expand Down Expand Up @@ -4378,7 +4375,6 @@ def get_groupby(
as_index: bool = True,
sort: bool = True,
group_keys: bool | lib.NoDefault = True,
squeeze: bool = False,
observed: bool = False,
mutated: bool = False,
dropna: bool = True,
Expand Down Expand Up @@ -4407,7 +4403,6 @@ def get_groupby(
as_index=as_index,
sort=sort,
group_keys=group_keys,
squeeze=squeeze,
observed=observed,
mutated=mutated,
dropna=dropna,
Expand Down
1 change: 0 additions & 1 deletion pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ def __init__(
# [int, Literal['index', 'columns', 'rows']]", variable has type "int")
self.axis = axis # type: ignore[assignment]
self.kind = kind
self.squeeze = False
self.group_keys = group_keys
self.as_index = True

Expand Down
14 changes: 0 additions & 14 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2059,24 +2059,11 @@ def groupby(
as_index: bool = True,
sort: bool = True,
group_keys: bool | lib.NoDefault = no_default,
squeeze: bool | lib.NoDefault = no_default,
observed: bool = False,
dropna: bool = True,
) -> SeriesGroupBy:
from pandas.core.groupby.generic import SeriesGroupBy

if squeeze is not no_default:
warnings.warn(
(
"The `squeeze` parameter is deprecated and "
"will be removed in a future version."
),
FutureWarning,
stacklevel=find_stack_level(),
)
else:
squeeze = False

if level is None and by is None:
raise TypeError("You have to supply one of 'by' and 'level'")
axis = self._get_axis_number(axis)
Expand All @@ -2089,7 +2076,6 @@ def groupby(
as_index=as_index,
sort=sort,
group_keys=group_keys,
squeeze=squeeze,
observed=observed,
dropna=dropna,
)
Expand Down
5 changes: 0 additions & 5 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,6 @@
result from ``apply`` is a like-indexed Series or DataFrame.
Specify ``group_keys`` explicitly to include the group keys or
not.
squeeze : bool, default False
Reduce the dimensionality of the return type if possible,
otherwise return a consistent type.

.. deprecated:: 1.1.0

observed : bool, default False
This only applies if any of the groupers are Categoricals.
Expand Down
45 changes: 0 additions & 45 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,46 +109,6 @@ def max_value(group):
tm.assert_series_equal(result, expected)


def test_groupby_return_type():

# GH2893, return a reduced type

def func(dataf):
return dataf["val2"] - dataf["val2"].mean()

df1 = DataFrame(
[
{"val1": 1, "val2": 20},
{"val1": 1, "val2": 19},
{"val1": 2, "val2": 27},
{"val1": 2, "val2": 12},
]
)

with tm.assert_produces_warning(FutureWarning):
result = df1.groupby("val1", squeeze=True).apply(func)
assert isinstance(result, Series)

df2 = DataFrame(
[
{"val1": 1, "val2": 20},
{"val1": 1, "val2": 19},
{"val1": 1, "val2": 27},
{"val1": 1, "val2": 12},
]
)

with tm.assert_produces_warning(FutureWarning):
result = df2.groupby("val1", squeeze=True).apply(func)
assert isinstance(result, Series)

# GH3596, return a consistent type (regression in 0.11 from 0.10.1)
df = DataFrame([[1, 1], [1, 1]], columns=["X", "Y"])
with tm.assert_produces_warning(FutureWarning):
result = df.groupby("X", squeeze=False).count()
assert isinstance(result, DataFrame)


def test_inconsistent_return_type():
# GH5592
# inconsistent return type
Expand Down Expand Up @@ -2498,7 +2458,6 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key():
(DataFrame, "as_index", False),
(DataFrame, "sort", False),
(DataFrame, "group_keys", False),
(DataFrame, "squeeze", True),
(DataFrame, "observed", True),
(DataFrame, "dropna", False),
pytest.param(
Expand All @@ -2513,14 +2472,10 @@ def test_group_on_two_row_multiindex_returns_one_tuple_key():
(Series, "as_index", False),
(Series, "sort", False),
(Series, "group_keys", False),
(Series, "squeeze", True),
(Series, "observed", True),
(Series, "dropna", False),
],
)
@pytest.mark.filterwarnings(
"ignore:The `squeeze` parameter is deprecated:FutureWarning"
)
def test_subsetting_columns_keeps_attrs(klass, attr, value):
# GH 9959 - When subsetting columns, don't drop attributes
df = DataFrame({"a": [1], "b": [2], "c": [3]})
Expand Down
17 changes: 7 additions & 10 deletions pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,9 +887,7 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze(
# We need to create a GroupBy object with only one non-NaT group,
# so use a huge freq so that all non-NaT dates will be grouped together
tdg = Grouper(key="Date", freq="100Y")

with tm.assert_produces_warning(FutureWarning, match="`squeeze` parameter"):
gb = df.groupby(tdg, squeeze=True)
gb = df.groupby(tdg)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can just remove this entire test as it was intentionally testing the squeeze functionality

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah true, but this shows nicely how it changed in cases where squeeze had an actual effect. I think this would be nice in our history if someone wants to go back and have a look


# check that we will go through the singular_series path
# in _wrap_applied_output_series
Expand All @@ -899,13 +897,12 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze(
# function that returns a Series
res = gb.apply(lambda x: x["Quantity"] * 2)

key = Timestamp("2013-12-31")
ordering = df["Date"].sort_values().dropna().index
mi = MultiIndex.from_product([[key], ordering], names=["Date", None])

ex_values = df["Quantity"].take(ordering).values * 2
expected = Series(ex_values, index=mi, name="Quantity")
tm.assert_series_equal(res, expected)
expected = DataFrame(
[[36, 6, 6, 10, 2]],
index=Index([Timestamp("2013-12-31")], name="Date"),
columns=Index([0, 1, 5, 2, 3], name="Quantity"),
)
tm.assert_frame_equal(res, expected)
Comment on lines -902 to +905
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

niiice


@td.skip_if_no("numba")
def test_groupby_agg_numba_timegrouper_with_nat(
Expand Down