Skip to content

Commit 3fffb6d

Browse files
authored
DEPR: Enforce numeric_only=False in groupby sum/mean (#49829)
* DEPR: Enforce numeric_only=False in groupby sum/mean * cleanup * Refinements * whatsnew fixup
1 parent 0a4440d commit 3fffb6d

File tree

15 files changed

+283
-208
lines changed

15 files changed

+283
-208
lines changed

asv_bench/benchmarks/reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def time_pivot_table_categorical_observed(self):
210210
)
211211

212212
def time_pivot_table_margins_only_column(self):
213-
self.df.pivot_table(columns=["key2", "key3"], margins=True)
213+
self.df.pivot_table(columns=["key1", "key2", "key3"], margins=True)
214214

215215

216216
class Crosstab:

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,7 @@ Removal of prior version deprecations/changes
572572
- Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
573573
- Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
574574
- Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`)
575+
- Changed default of ``numeric_only`` to ``False`` in :meth:`.DataFrameGroupBy.sum` and :meth:`.DataFrameGroupBy.mean` (:issue:`46072`)
575576
-
576577

577578
.. ---------------------------------------------------------------------------

pandas/core/groupby/groupby.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -2049,7 +2049,7 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
20492049
@Substitution(see_also=_common_see_also)
20502050
def mean(
20512051
self,
2052-
numeric_only: bool | lib.NoDefault = lib.no_default,
2052+
numeric_only: bool = False,
20532053
engine: str = "cython",
20542054
engine_kwargs: dict[str, bool] | None = None,
20552055
):
@@ -2058,12 +2058,12 @@ def mean(
20582058
20592059
Parameters
20602060
----------
2061-
numeric_only : bool, default True
2061+
numeric_only : bool, default False
20622062
Include only float, int, boolean columns.
20632063
20642064
.. versionchanged:: 2.0.0
20652065
2066-
numeric_only no longer accepts ``None``.
2066+
numeric_only no longer accepts ``None`` and defaults to ``False``.
20672067
20682068
engine : str, default None
20692069
* ``'cython'`` : Runs the operation through C-extensions from cython.
@@ -2120,7 +2120,6 @@ def mean(
21202120
2 4.0
21212121
Name: B, dtype: float64
21222122
"""
2123-
numeric_only_bool = self._resolve_numeric_only("mean", numeric_only, axis=0)
21242123

21252124
if maybe_use_numba(engine):
21262125
from pandas.core._numba.kernels import sliding_mean
@@ -2129,7 +2128,7 @@ def mean(
21292128
else:
21302129
result = self._cython_agg_general(
21312130
"mean",
2132-
alt=lambda x: Series(x).mean(numeric_only=numeric_only_bool),
2131+
alt=lambda x: Series(x).mean(numeric_only=numeric_only),
21332132
numeric_only=numeric_only,
21342133
)
21352134
return result.__finalize__(self.obj, method="groupby")
@@ -2382,10 +2381,10 @@ def size(self) -> DataFrame | Series:
23822381
return self._reindex_output(result, fill_value=0)
23832382

23842383
@final
2385-
@doc(_groupby_agg_method_template, fname="sum", no=True, mc=0)
2384+
@doc(_groupby_agg_method_template, fname="sum", no=False, mc=0)
23862385
def sum(
23872386
self,
2388-
numeric_only: bool | lib.NoDefault = lib.no_default,
2387+
numeric_only: bool = False,
23892388
min_count: int = 0,
23902389
engine: str | None = None,
23912390
engine_kwargs: dict[str, bool] | None = None,

pandas/tests/extension/base/groupby.py

+19-11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
import pytest
22

3-
from pandas.core.dtypes.common import is_numeric_dtype
3+
from pandas.core.dtypes.common import (
4+
is_bool_dtype,
5+
is_numeric_dtype,
6+
is_object_dtype,
7+
is_period_dtype,
8+
is_string_dtype,
9+
)
410

511
import pandas as pd
612
import pandas._testing as tm
@@ -100,17 +106,19 @@ def test_in_numeric_groupby(self, data_for_grouping):
100106
)
101107

102108
dtype = data_for_grouping.dtype
103-
if is_numeric_dtype(dtype) or dtype.name == "decimal":
104-
warn = None
105-
else:
106-
warn = FutureWarning
107-
msg = "The default value of numeric_only"
108-
with tm.assert_produces_warning(warn, match=msg):
109-
result = df.groupby("A").sum().columns
110-
111-
if data_for_grouping.dtype._is_numeric:
109+
if (
110+
is_numeric_dtype(dtype)
111+
or is_bool_dtype(dtype)
112+
or dtype.name == "decimal"
113+
or is_string_dtype(dtype)
114+
or is_period_dtype(dtype)
115+
or is_object_dtype(dtype)
116+
):
112117
expected = pd.Index(["B", "C"])
118+
result = df.groupby("A").sum().columns
113119
else:
114120
expected = pd.Index(["C"])
115-
121+
with pytest.raises(TypeError, match="does not support"):
122+
df.groupby("A").sum().columns
123+
result = df.groupby("A").sum(numeric_only=True).columns
116124
tm.assert_index_equal(result, expected)

pandas/tests/generic/test_frame.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,7 @@ def test_metadata_propagation_indiv_groupby(self):
7171
"D": np.random.randn(8),
7272
}
7373
)
74-
msg = "The default value of numeric_only"
75-
with tm.assert_produces_warning(FutureWarning, match=msg):
76-
result = df.groupby("A").sum()
74+
result = df.groupby("A").sum()
7775
tm.assert_metadata_equivalent(df, result)
7876

7977
def test_metadata_propagation_indiv_resample(self):

pandas/tests/groupby/test_categorical.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,10 @@ def test_basic(): # TODO: split this test
103103
gb = df.groupby("A", observed=False)
104104
exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True)
105105
expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)})
106-
result = gb.sum()
106+
msg = "category type does not support sum operations"
107+
with pytest.raises(TypeError, match=msg):
108+
gb.sum()
109+
result = gb.sum(numeric_only=True)
107110
tm.assert_frame_equal(result, expected)
108111

109112
# GH 8623
@@ -338,7 +341,9 @@ def test_observed(observed):
338341

339342
gb = df.groupby(["A", "B"], observed=observed)
340343
exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
341-
expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index)
344+
expected = DataFrame(
345+
{"values": [1, 2, 3, 4], "C": ["foo", "bar", "foo", "bar"]}, index=exp_index
346+
)
342347
result = gb.sum()
343348
if not observed:
344349
expected = cartesian_product_for_groupers(

pandas/tests/groupby/test_function.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,12 @@ def test_averages(self, df, method):
166166
],
167167
)
168168

169-
with pytest.raises(TypeError, match="[Cc]ould not convert"):
170-
getattr(gb, method)(numeric_only=False)
171-
result = getattr(gb, method)()
169+
if method == "mean":
170+
with pytest.raises(TypeError, match="[Cc]ould not convert"):
171+
getattr(gb, method)()
172+
result = getattr(gb, method)(numeric_only=True)
173+
else:
174+
result = getattr(gb, method)()
172175
tm.assert_frame_equal(result.reindex_like(expected), expected)
173176

174177
expected_columns = expected.columns
@@ -264,6 +267,15 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
264267
)
265268
with pytest.raises(exception, match=msg):
266269
getattr(gb, method)()
270+
elif method in ("sum", "mean"):
271+
msg = "|".join(
272+
[
273+
"category type does not support sum operations",
274+
"Could not convert",
275+
]
276+
)
277+
with pytest.raises(exception, match=msg):
278+
getattr(gb, method)()
267279
else:
268280
result = getattr(gb, method)()
269281
tm.assert_index_equal(result.columns, expected_columns_numeric)
@@ -1375,7 +1387,7 @@ def test_groupby_sum_timedelta_with_nat():
13751387
("idxmin", True, True),
13761388
("last", False, True),
13771389
("max", False, True),
1378-
("mean", True, True),
1390+
("mean", False, True),
13791391
("median", True, True),
13801392
("min", False, True),
13811393
("nth", False, False),
@@ -1386,7 +1398,7 @@ def test_groupby_sum_timedelta_with_nat():
13861398
("sem", True, True),
13871399
("skew", True, True),
13881400
("std", True, True),
1389-
("sum", True, True),
1401+
("sum", False, True),
13901402
("var", True, True),
13911403
],
13921404
)

0 commit comments

Comments
 (0)