Skip to content

DEPR: groupby with as_index=False not including out-of-axis groupings #52333

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 17, 2023
Merged
9 changes: 7 additions & 2 deletions doc/source/whatsnew/v0.15.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,14 @@ API changes

current behavior:

.. ipython:: python
.. code-block:: ipython

df.groupby(ts, as_index=False).max()
In [4]: df.groupby(ts, as_index=False).max()
Out[4]:
jim joe
0 72 83
1 77 84
2 96 65

- ``groupby`` will not erroneously exclude columns if the column name conflicts
with the grouper name (:issue:`8112`):
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ Deprecations
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`)
- Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`)
- Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
- Deprecated :class:`.DataFrameGroupBy` with ``as_index=False`` not including groupings in the result when they are not columns of the DataFrame (:issue:`49519`)
- Deprecated :func:`is_categorical_dtype`, use ``isinstance(obj.dtype, pd.CategoricalDtype)`` instead (:issue:`52527`)
- Deprecated :func:`is_datetime64tz_dtype`, check ``isinstance(dtype, pd.DatetimeTZDtype)`` instead (:issue:`52607`)
- Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`)
Expand Down
17 changes: 15 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1244,8 +1244,21 @@ def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
):
# GH #28549
# When using .apply(-), name will be in columns already
if in_axis and name not in columns:
result.insert(0, name, lev)
if name not in columns:
if in_axis:
result.insert(0, name, lev)
else:
msg = (
"A grouping was used that is not in the columns of the "
"DataFrame and so was excluded from the result. This grouping "
"will be included in a future version of pandas. Add the "
"grouping as a column of the DataFrame to silence this warning."
)
warnings.warn(
message=msg,
category=FutureWarning,
stacklevel=find_stack_level(),
)

return result

Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,9 @@ def test_as_index():

# function grouper
f = lambda r: df.loc[r, "A"]
result = df.groupby(["cat", f], as_index=False, observed=True).sum()
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby(["cat", f], as_index=False, observed=True).sum()
expected = DataFrame(
{
"cat": Categorical([1, 2], categories=df.cat.cat.categories),
Expand All @@ -784,7 +786,9 @@ def test_as_index():

# another not in-axis grouper (conflicting names in index)
s = Series(["a", "b", "b"], name="cat")
result = df.groupby(["cat", s], as_index=False, observed=True).sum()
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby(["cat", s], as_index=False, observed=True).sum()
tm.assert_frame_equal(result, expected)

# is original index dropped?
Expand Down
25 changes: 18 additions & 7 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,18 +244,26 @@ def f(x, q=None, axis=0):
# DataFrame
for as_index in [True, False]:
df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index)
agg_result = df_grouped.agg(np.percentile, 80, axis=0)
apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
expected = df_grouped.quantile(0.8)
warn = None if as_index else FutureWarning
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(warn, match=msg):
agg_result = df_grouped.agg(np.percentile, 80, axis=0)
with tm.assert_produces_warning(warn, match=msg):
apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
with tm.assert_produces_warning(warn, match=msg):
expected = df_grouped.quantile(0.8)
tm.assert_frame_equal(apply_result, expected, check_names=False)
tm.assert_frame_equal(agg_result, expected)

apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8])
expected_seq = df_grouped.quantile([0.4, 0.8])
with tm.assert_produces_warning(warn, match=msg):
expected_seq = df_grouped.quantile([0.4, 0.8])
tm.assert_frame_equal(apply_result, expected_seq, check_names=False)

agg_result = df_grouped.agg(f, q=80)
apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
with tm.assert_produces_warning(warn, match=msg):
agg_result = df_grouped.agg(f, q=80)
with tm.assert_produces_warning(warn, match=msg):
apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
tm.assert_frame_equal(agg_result, expected)
tm.assert_frame_equal(apply_result, expected, check_names=False)

Expand All @@ -266,7 +274,10 @@ def test_pass_args_kwargs_duplicate_columns(tsframe, as_index):
tsframe.columns = ["A", "B", "A", "C"]
gb = tsframe.groupby(lambda x: x.month, as_index=as_index)

res = gb.agg(np.percentile, 80, axis=0)
warn = None if as_index else FutureWarning
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(warn, match=msg):
res = gb.agg(np.percentile, 80, axis=0)

ex_data = {
1: tsframe[tsframe.index.month == 1].quantile(0.8),
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/groupby/test_groupby_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,13 @@ def test_categorical_reducers(
gb_keepna = df.groupby(
keys, dropna=False, observed=observed, sort=sort, as_index=as_index
)
result = getattr(gb_keepna, reduction_func)(*args)
if as_index or index_kind == "range" or reduction_func == "size":
warn = None
else:
warn = FutureWarning
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(warn, match=msg):
result = getattr(gb_keepna, reduction_func)(*args)

# size will return a Series, others are DataFrame
tm.assert_equal(result, expected)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,9 @@ def test_grouping_by_key_is_in_axis():

# Currently only in-axis groupings are including in the result when as_index=False;
# This is likely to change in the future.
result = gb.sum()
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = gb.sum()
expected = DataFrame({"b": [1, 2], "c": [7, 5]})
tm.assert_frame_equal(result, expected)

Expand Down