-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: support argmin/max, idxmin/max with object dtype #54109
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
datetime, | ||
timedelta, | ||
) | ||
from decimal import Decimal | ||
|
||
import numpy as np | ||
import pytest | ||
|
@@ -1070,27 +1071,89 @@ def test_timedelta64_analytics(self): | |
(Series(["foo", "foo", "bar", "bar", None, np.nan, "baz"]), TypeError), | ||
], | ||
) | ||
def test_assert_idxminmax_raises(self, test_input, error_type): | ||
def test_assert_idxminmax_empty_raises(self, test_input, error_type): | ||
""" | ||
Cases where ``Series.argmax`` and related should raise an exception | ||
""" | ||
msg = ( | ||
"reduction operation 'argmin' not allowed for this dtype|" | ||
"attempt to get argmin of an empty sequence" | ||
) | ||
with pytest.raises(error_type, match=msg): | ||
test_input = Series([], dtype="float64") | ||
msg = "attempt to get argmin of an empty sequence" | ||
with pytest.raises(ValueError, match=msg): | ||
test_input.idxmin() | ||
with pytest.raises(error_type, match=msg): | ||
with pytest.raises(ValueError, match=msg): | ||
test_input.idxmin(skipna=False) | ||
msg = ( | ||
"reduction operation 'argmax' not allowed for this dtype|" | ||
"attempt to get argmax of an empty sequence" | ||
) | ||
with pytest.raises(error_type, match=msg): | ||
msg = "attempt to get argmax of an empty sequence" | ||
with pytest.raises(ValueError, match=msg): | ||
test_input.idxmax() | ||
with pytest.raises(error_type, match=msg): | ||
with pytest.raises(ValueError, match=msg): | ||
test_input.idxmax(skipna=False) | ||
|
||
def test_idxminmax_object_dtype(self): | ||
# pre-2.1 object-dtype was disallowed for argmin/max | ||
ser = Series(["foo", "bar", "baz"]) | ||
assert ser.idxmax() == 0 | ||
assert ser.idxmax(skipna=False) == 0 | ||
assert ser.idxmin() == 1 | ||
assert ser.idxmin(skipna=False) == 1 | ||
|
||
ser2 = Series([(1,), (2,)]) | ||
assert ser2.idxmax() == 1 | ||
assert ser2.idxmax(skipna=False) == 1 | ||
assert ser2.idxmin() == 0 | ||
assert ser2.idxmin(skipna=False) == 0 | ||
|
||
# attempting to compare np.nan with string raises | ||
ser3 = Series(["foo", "foo", "bar", "bar", None, np.nan, "baz"]) | ||
msg = "'>' not supported between instances of 'float' and 'str'" | ||
with pytest.raises(TypeError, match=msg): | ||
ser3.idxmax() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the skipping part is done after the np.argmin/np.argmax is done, so that can still raise |
||
with pytest.raises(TypeError, match=msg): | ||
ser3.idxmax(skipna=False) | ||
msg = "'<' not supported between instances of 'float' and 'str'" | ||
with pytest.raises(TypeError, match=msg): | ||
ser3.idxmin() | ||
with pytest.raises(TypeError, match=msg): | ||
ser3.idxmin(skipna=False) | ||
|
||
def test_idxminmax_object_frame(self): | ||
# GH#4279 | ||
df = DataFrame([["zimm", 2.5], ["biff", 1.0], ["bid", 12.0]]) | ||
res = df.idxmax() | ||
exp = Series([0, 2]) | ||
tm.assert_series_equal(res, exp) | ||
|
||
def test_idxminmax_object_tuples(self): | ||
# GH#43697 | ||
ser = Series([(1, 3), (2, 2), (3, 1)]) | ||
assert ser.idxmax() == 2 | ||
assert ser.idxmin() == 0 | ||
assert ser.idxmax(skipna=False) == 2 | ||
assert ser.idxmin(skipna=False) == 0 | ||
|
||
def test_idxminmax_object_decimals(self): | ||
# GH#40685 | ||
df = DataFrame( | ||
{ | ||
"idx": [0, 1], | ||
"x": [Decimal("8.68"), Decimal("42.23")], | ||
"y": [Decimal("7.11"), Decimal("79.61")], | ||
} | ||
) | ||
res = df.idxmax() | ||
exp = Series({"idx": 1, "x": 1, "y": 1}) | ||
tm.assert_series_equal(res, exp) | ||
|
||
res2 = df.idxmin() | ||
exp2 = exp - 1 | ||
tm.assert_series_equal(res2, exp2) | ||
|
||
def test_argminmax_object_ints(self): | ||
# GH#18021 | ||
ser = Series([0, 1], dtype="object") | ||
assert ser.argmax() == 1 | ||
assert ser.argmin() == 0 | ||
assert ser.argmax(skipna=False) == 1 | ||
assert ser.argmin(skipna=False) == 0 | ||
|
||
def test_idxminmax_with_inf(self): | ||
# For numeric data with NA and Inf (GH #13595) | ||
s = Series([0, -np.inf, np.inf, np.nan]) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like
error_type
can be removed from the parametrization now?