diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b4aea4240e458..a4462a292f4f9 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1610,17 +1610,20 @@ def idxmax( numeric_only_arg = numeric_only def func(df): - res = df._reduce( - nanops.nanargmax, - "argmax", - axis=axis, - skipna=skipna, - numeric_only=numeric_only_arg, - ) - indices = res._values - index = df._get_axis(axis) - result = [index[i] if i >= 0 else np.nan for i in indices] - return df._constructor_sliced(result, index=res.index) + with warnings.catch_warnings(): + # Suppress numeric_only warnings here, will warn below + warnings.filterwarnings("ignore", ".*numeric_only in DataFrame.argmax") + res = df._reduce( + nanops.nanargmax, + "argmax", + axis=axis, + skipna=skipna, + numeric_only=numeric_only_arg, + ) + indices = res._values + index = df._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + return df._constructor_sliced(result, index=res.index) func.__name__ = "idxmax" result = self._python_apply_general(func, self._obj_with_exclusions) @@ -1646,17 +1649,20 @@ def idxmin( numeric_only_arg = numeric_only def func(df): - res = df._reduce( - nanops.nanargmin, - "argmin", - axis=axis, - skipna=skipna, - numeric_only=numeric_only_arg, - ) - indices = res._values - index = df._get_axis(axis) - result = [index[i] if i >= 0 else np.nan for i in indices] - return df._constructor_sliced(result, index=res.index) + with warnings.catch_warnings(): + # Suppress numeric_only warnings here, will warn below + warnings.filterwarnings("ignore", ".*numeric_only in DataFrame.argmin") + res = df._reduce( + nanops.nanargmin, + "argmin", + axis=axis, + skipna=skipna, + numeric_only=numeric_only_arg, + ) + indices = res._values + index = df._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + return df._constructor_sliced(result, index=res.index) func.__name__ = "idxmin" result = self._python_apply_general(func, self._obj_with_exclusions) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c294082edce71..c2098fbe93a56 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1716,8 +1716,9 @@ def _cython_agg_general( kwd_name = "numeric_only" if how in ["any", "all"]: kwd_name = "bool_only" + kernel = "sum" if how == "add" else how raise NotImplementedError( - f"{type(self).__name__}.{how} does not implement {kwd_name}." + f"{type(self).__name__}.{kernel} does not implement {kwd_name}." ) elif not is_ser: data = data.get_numeric_data(copy=False) @@ -2194,10 +2195,16 @@ def std( return np.sqrt(self._numba_agg_general(sliding_var, engine_kwargs, ddof)) else: + # Resolve numeric_only so that var doesn't warn + numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0) + if numeric_only_bool and self.obj.ndim == 1: + raise NotImplementedError( + f"{type(self).__name__}.std does not implement numeric_only." + ) result = self._get_cythonized_result( libgroupby.group_var, cython_dtype=np.dtype(np.float64), - numeric_only=numeric_only, + numeric_only=numeric_only_bool, needs_counts=True, post_processing=lambda vals, inference: np.sqrt(vals), ddof=ddof, @@ -2296,7 +2303,13 @@ def sem(self, ddof: int = 1, numeric_only: bool | lib.NoDefault = lib.no_default Series or DataFrame Standard error of the mean of values within each group. """ - result = self.std(ddof=ddof, numeric_only=numeric_only) + # Reolve numeric_only so that std doesn't warn + numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0) + if numeric_only_bool and self.obj.ndim == 1: + raise NotImplementedError( + f"{type(self).__name__}.sem does not implement numeric_only." + ) + result = self.std(ddof=ddof, numeric_only=numeric_only_bool) self._maybe_warn_numeric_only_depr("sem", result, numeric_only) if result.ndim == 1: @@ -3167,6 +3180,10 @@ def quantile( b 3.0 """ numeric_only_bool = self._resolve_numeric_only(numeric_only, axis=0) + if numeric_only_bool and self.obj.ndim == 1: + raise NotImplementedError( + f"{type(self).__name__}.quantile does not implement numeric_only" + ) def pre_processor(vals: ArrayLike) -> tuple[np.ndarray, np.dtype | None]: if is_object_dtype(vals): diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 25266d1e6ab80..7f24143920b84 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -306,7 +306,7 @@ def test_idxmax(self, gb): # non-cython calls should not include the grouper expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) expected.index.name = "A" - msg = "The default value of numeric_only" + msg = "The default value of numeric_only in DataFrameGroupBy.idxmax" with tm.assert_produces_warning(FutureWarning, match=msg): result = gb.idxmax() tm.assert_frame_equal(result, expected) @@ -317,7 +317,7 @@ def test_idxmin(self, gb): # non-cython calls should not include the grouper expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) expected.index.name = "A" - msg = "The default value of numeric_only" + msg = "The default value of numeric_only in DataFrameGroupBy.idxmin" with tm.assert_produces_warning(FutureWarning, match=msg): result = gb.idxmin() tm.assert_frame_equal(result, expected) @@ -1356,6 +1356,77 @@ def test_deprecate_numeric_only( method(*args, **kwargs) +def test_deprecate_numeric_only_series(groupby_func, request): + # GH#46560 + if groupby_func in ("backfill", "mad", "pad", "tshift"): + pytest.skip("method is deprecated") + elif groupby_func == "corrwith": + msg = "corrwith is not implemented on SeriesGroupBy" + request.node.add_marker(pytest.mark.xfail(reason=msg)) + + ser = Series(list("xyz")) + gb = ser.groupby([0, 0, 1]) + + if groupby_func == "corrwith": + args = (ser,) + elif groupby_func == "corr": + args = (ser,) + elif groupby_func == "cov": + args = (ser,) + elif groupby_func == "nth": + args = (0,) + elif groupby_func == "fillna": + args = (True,) + elif groupby_func == "take": + args = ([0],) + elif groupby_func == "quantile": + args = (0.5,) + else: + args = () + method = getattr(gb, groupby_func) + + try: + _ = method(*args) + except (TypeError, ValueError) as err: + # ops that only work on numeric dtypes + assert groupby_func in ( + "corr", + "cov", + "cummax", + "cummin", + "cumprod", + "cumsum", + "diff", + "idxmax", + "idxmin", + "mean", + "median", + "pct_change", + "prod", + "quantile", + "sem", + "skew", + "std", + "var", + ) + assert ( + "could not convert" in str(err).lower() + or "unsupported operand type" in str(err) + or "not allowed for this dtype" in str(err) + or "can't multiply sequence by non-int" in str(err) + or "cannot be performed against 'object' dtypes" in str(err) + or "is not supported for object dtype" in str(err) + ), str(err) + + msgs = ( + "got an unexpected keyword argument 'numeric_only'", + f"{groupby_func} does not implement numeric_only", + f"{groupby_func} is not supported for object dtype", + ) + with pytest.raises((NotImplementedError, TypeError), match=f"({'|'.join(msgs)})"): + _ = method(*args, numeric_only=True) + + @pytest.mark.parametrize("dtype", [int, float, object]) @pytest.mark.parametrize( "kwargs",