Skip to content

BUG: GroupBy.idxmax/idxmin with EA dtypes #38733

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jan 19, 2021
12 changes: 9 additions & 3 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import Appender, Substitution
from pandas.util._validators import validate_fillna_kwargs
from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs

from pandas.core.dtypes.cast import maybe_cast_to_extension_array
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -596,7 +596,7 @@ def argsort(
mask=np.asarray(self.isna()),
)

def argmin(self):
def argmin(self, skipna: bool = True) -> int:
"""
Return the index of minimum value.

Expand All @@ -611,9 +611,12 @@ def argmin(self):
--------
ExtensionArray.argmax
"""
validate_bool_kwarg(skipna, "skipna")
if not skipna and self.isna().any():
return -1
Copy link
Member

@jorisvandenbossche jorisvandenbossche Jan 18, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

those should not return -1, IMO. I would prefer to not put this "wrong" behaviour in the EAs (but keep it in Series were it already has this behaviour), as long as we didn't decide on the preferred behaviour.

Short term, could eg raise an error saying that skipna=False is not yet implemented, but by accepting skipna, it can already fix the default groupby case (see my comment at #38733 (comment))

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could eg raise an error saying that skipna=False is not yet implemented

so... like i had in the previous commit?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so... like i had in the previous commit?

Yes, that would be my preference.

(sorry for not being explicit about it, but my earlier comment of "this PR can already fix the non-skipna case in the meantime?" quite well matched the state of the PR, it was mainly to mean: we don't have to solve the skipna=False discussion to merge a PR like this to fix the skipna=True case)

return nargminmax(self, "argmin")

def argmax(self):
def argmax(self, skipna: bool = True) -> int:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you update the Parameters section here (and for argmin)

"""
Return the index of maximum value.

Expand All @@ -628,6 +631,9 @@ def argmax(self):
--------
ExtensionArray.argmin
"""
validate_bool_kwarg(skipna, "skipna")
if not skipna and self.isna().any():
return -1
return nargminmax(self, "argmax")

def fillna(self, value=None, method=None, limit=None):
Expand Down
10 changes: 2 additions & 8 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,10 +726,7 @@ def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)

if isinstance(delegate, ExtensionArray):
if not skipna and delegate.isna().any():
return -1
else:
return delegate.argmax()
return delegate.argmax(skipna=skipna)
else:
return nanops.nanargmax(delegate, skipna=skipna)

Expand Down Expand Up @@ -784,10 +781,7 @@ def argmin(self, axis=None, skipna=True, *args, **kwargs) -> int:
skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)

if isinstance(delegate, ExtensionArray):
if not skipna and delegate.isna().any():
return -1
else:
return delegate.argmin()
return delegate.argmin(skipna=skipna)
else:
return nanops.nanargmin(delegate, skipna=skipna)

Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/extension/base/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,21 @@ def test_argmin_argmax_all_na(self, method, data, na_value):
with pytest.raises(ValueError, match=err_msg):
getattr(data_na, method)()

@pytest.mark.parametrize(
"op_name, skipna, expected",
[
("argmax", True, 0),
("argmin", True, 2),
("argmax", False, -1),
("argmin", False, -1),
],
)
def test_argmin_argmax_skipna(
self, op_name, skipna, expected, data_missing_for_sorting
):
result = getattr(data_missing_for_sorting, op_name)(skipna=skipna)
tm.assert_almost_equal(result, expected)

@pytest.mark.parametrize(
"op_name, skipna, expected",
[
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,10 +531,16 @@ def test_idxmin_idxmax_returns_int_types(func, values):
}
)
df["c_date"] = pd.to_datetime(df["c_date"])
df["c_date_tz"] = df["c_date"].dt.tz_localize("US/Pacific")
df["c_timedelta"] = df["c_date"] - df["c_date"].iloc[0]
df["c_period"] = df["c_date"].dt.to_period("W")

result = getattr(df.groupby("name"), func)()

expected = DataFrame(values, index=Index(["A", "B"], name="name"))
expected["c_date_tz"] = expected["c_date"]
expected["c_timedelta"] = expected["c_date"]
expected["c_period"] = expected["c_date"]

tm.assert_frame_equal(result, expected)

Expand Down