-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Fix issues with numeric_only deprecation #47481
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -306,7 +306,7 @@ def test_idxmax(self, gb): | |
# non-cython calls should not include the grouper | ||
expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) | ||
expected.index.name = "A" | ||
msg = "The default value of numeric_only" | ||
msg = "The default value of numeric_only in DataFrameGroupBy.idxmax" | ||
with tm.assert_produces_warning(FutureWarning, match=msg): | ||
result = gb.idxmax() | ||
tm.assert_frame_equal(result, expected) | ||
|
@@ -317,7 +317,7 @@ def test_idxmin(self, gb): | |
# non-cython calls should not include the grouper | ||
expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) | ||
expected.index.name = "A" | ||
msg = "The default value of numeric_only" | ||
msg = "The default value of numeric_only in DataFrameGroupBy.idxmin" | ||
with tm.assert_produces_warning(FutureWarning, match=msg): | ||
result = gb.idxmin() | ||
tm.assert_frame_equal(result, expected) | ||
|
@@ -1356,6 +1356,90 @@ def test_deprecate_numeric_only( | |
method(*args, **kwargs) | ||
|
||
|
||
def test_deprecate_numeric_only_series(groupby_func, request): | ||
# GH#46560 | ||
if groupby_func in ("backfill", "mad", "pad", "tshift"): | ||
pytest.skip("method is deprecated") | ||
elif groupby_func == "corrwith": | ||
msg = "corrwith is not implemented on SeriesGroupBy" | ||
request.node.add_marker(pytest.mark.xfail(reason=msg)) | ||
|
||
ser = Series(list("xyz")) | ||
gb = ser.groupby([0, 0, 1]) | ||
|
||
if groupby_func == "corrwith": | ||
args = (ser,) | ||
elif groupby_func == "corr": | ||
args = (ser,) | ||
elif groupby_func == "cov": | ||
args = (ser,) | ||
elif groupby_func == "nth": | ||
args = (0,) | ||
elif groupby_func == "fillna": | ||
args = (True,) | ||
elif groupby_func == "take": | ||
args = ([0],) | ||
elif groupby_func == "quantile": | ||
args = (0.5,) | ||
else: | ||
args = () | ||
method = getattr(gb, groupby_func, None) | ||
|
||
try: | ||
_ = method(*args) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to confirm, all There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i.e. can this be tested like
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, none are successful. Some would be if the dtype was not object, but I plan to deprecate that in a follow up assuming no objection. Changed over to using pytest.raises. |
||
except (TypeError, ValueError) as err: | ||
# ops that only work on numeric dtypes | ||
assert groupby_func in ( | ||
"corr", | ||
"cov", | ||
"cummax", | ||
"cummin", | ||
"cumprod", | ||
"cumsum", | ||
"diff", | ||
"idxmax", | ||
"idxmin", | ||
"mean", | ||
"median", | ||
"pct_change", | ||
"prod", | ||
"quantile", | ||
"sem", | ||
"skew", | ||
"std", | ||
"var", | ||
) | ||
assert ( | ||
"could not convert" in str(err).lower() | ||
or "unsupported operand type" in str(err) | ||
or "not allowed for this dtype" in str(err) | ||
or "can't multiply sequence by non-int" in str(err) | ||
or "cannot be performed against 'object' dtypes" in str(err) | ||
or "is not supported for object dtype" in str(err) | ||
), str(err) | ||
|
||
if groupby_func in ("cummax", "cummin", "cumprod", "cumsum"): | ||
# kernels that did not always raise when passing numeric_only=True in 1.4 | ||
try: | ||
msg = ( | ||
f"Passing `numeric_only=True` to SeriesGroupBy.{groupby_func} " | ||
"is deprecated" | ||
) | ||
with tm.assert_produces_warning(FutureWarning, match=msg): | ||
method(*args, numeric_only=True) | ||
except TypeError as err: | ||
assert str(err) == f"{groupby_func} is not supported for object dtype", str( | ||
err | ||
) | ||
else: | ||
try: | ||
method(*args, numeric_only=True) | ||
except (NotImplementedError, TypeError) as err: | ||
assert "got an unexpected keyword argument 'numeric_only'" in str( | ||
err | ||
) or f"{groupby_func} does not implement numeric_only" in str(err), str(err) | ||
|
||
|
||
@pytest.mark.parametrize("dtype", [int, float, object]) | ||
@pytest.mark.parametrize( | ||
"kwargs", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With the exception of
rank
, all Series ops fail when passing numeric_only=True (even when the Series is numeric) with a NotImplementedError. However most SeriesGroupBy ops allow it. SerisGroupBy ops will be successful when they can handle object dtype or the dtype is numeric. They will raise an error attempting to perform the op when dtype cannot be operated on.I'm thinking we should make SeriesGroupBy consistent with Series by raising whenever numreic_only is truthy.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When I first added the deprecation, I thought other SeriesGroupBy ops would always raise when passing numeric_only=True. Seeing now that's not the case, going to siphon this off into a followup.
The ops std, sem, and quantile all gained the numeric_only argument as part of 1.5.