Skip to content

DEPR: idxmin/idxmax with all-NA #54226

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ Deprecations
- Deprecated ``axis=1`` in :meth:`DataFrame.ewm`, :meth:`DataFrame.rolling`, :meth:`DataFrame.expanding`, transpose before calling the method instead (:issue:`51778`)
- Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
- Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
- Deprecated behavior of :meth:`DataFrame.idxmax`, :meth:`DataFrame.idxmin`, :meth:`Series.idxmax`, :meth:`Series.idxmin` in with all-NA entries or any-NA and ``skipna=False``; in a future version these will raise ``ValueError`` (:issue:`51276`)
- Deprecated explicit support for subclassing :class:`Index` (:issue:`45289`)
- Deprecated making functions given to :meth:`Series.agg` attempt to operate on each element in the :class:`Series` and only operate on the whole :class:`Series` if the elementwise operations failed. In the future, functions given to :meth:`Series.agg` will always operate on the whole :class:`Series` only. To keep the current behavior, use :meth:`Series.transform` instead. (:issue:`53325`)
- Deprecated making the functions in a list of functions given to :meth:`DataFrame.agg` attempt to operate on each element in the :class:`DataFrame` and only operate on the columns of the :class:`DataFrame` if the elementwise operations failed. To keep the current behavior, use :meth:`DataFrame.transform` instead. (:issue:`53325`)
Expand Down
4 changes: 4 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ def pytest_collection_modifyitems(items, config) -> None:
("is_sparse", "is_sparse is deprecated"),
("NDFrame.replace", "The 'method' keyword"),
("NDFrame.replace", "Series.replace without 'value'"),
("Series.idxmin", "The behavior of Series.idxmin"),
("Series.idxmax", "The behavior of Series.idxmax"),
("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"),
("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"),
# Docstring divides by zero to show behavior difference
("missing.mask_zero_div_zero", "divide by zero encountered"),
(
Expand Down
18 changes: 18 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11264,6 +11264,15 @@ def idxmin(
indices = res._values
# indices will always be np.ndarray since axis is not N

if (indices == -1).any():
warnings.warn(
f"The behavior of {type(self).__name__}.idxmin with all-NA "
"values, or any-NA and skipna=False, is deprecated. In a future "
"version this will raise ValueError",
FutureWarning,
stacklevel=find_stack_level(),
)

index = data._get_axis(axis)
result = algorithms.take(
index._values, indices, allow_fill=True, fill_value=index._na_value
Expand Down Expand Up @@ -11292,6 +11301,15 @@ def idxmax(
indices = res._values
# indices will always be 1d array since axis is not None

if (indices == -1).any():
warnings.warn(
f"The behavior of {type(self).__name__}.idxmax with all-NA "
"values, or any-NA and skipna=False, is deprecated. In a future "
"version this will raise ValueError",
FutureWarning,
stacklevel=find_stack_level(),
)

index = data._get_axis(axis)
result = algorithms.take(
index._values, indices, allow_fill=True, fill_value=index._na_value
Expand Down
16 changes: 16 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2536,8 +2536,16 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
# warning for idxmin
warnings.simplefilter("ignore")
i = self.argmin(axis, skipna, *args, **kwargs)

if i == -1:
# GH#43587 give correct NA value for Index.
warnings.warn(
f"The behavior of {type(self).__name__}.idxmin with all-NA "
"values, or any-NA and skipna=False, is deprecated. In a future "
"version this will raise ValueError",
FutureWarning,
stacklevel=find_stack_level(),
)
return self.index._na_value
return self.index[i]

Expand Down Expand Up @@ -2612,8 +2620,16 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
# warning for argmax
warnings.simplefilter("ignore")
i = self.argmax(axis, skipna, *args, **kwargs)

if i == -1:
# GH#43587 give correct NA value for Index.
warnings.warn(
f"The behavior of {type(self).__name__}.idxmax with all-NA "
"values, or any-NA and skipna=False, is deprecated. In a future "
"version this will raise ValueError",
FutureWarning,
stacklevel=find_stack_level(),
)
return self.index._na_value
return self.index[i]

Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/extension/base/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,12 @@ def test_argreduce_series(
):
# data_missing_for_sorting -> [B, NA, A] with A < B and NA missing.
warn = None
msg = "The behavior of Series.argmax/argmin"
if op_name.startswith("arg") and expected == -1:
warn = FutureWarning
msg = "The behavior of Series.argmax/argmin"
if op_name.startswith("idx") and np.isnan(expected):
warn = FutureWarning
msg = f"The behavior of Series.{op_name}"
ser = pd.Series(data_missing_for_sorting)
with tm.assert_produces_warning(warn, match=msg):
result = getattr(ser, op_name)(skipna=skipna)
Expand Down
24 changes: 20 additions & 4 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -968,8 +968,16 @@ def test_idxmin(self, float_frame, int_frame, skipna, axis):
frame.iloc[5:10] = np.nan
frame.iloc[15:20, -2:] = np.nan
for df in [frame, int_frame]:
result = df.idxmin(axis=axis, skipna=skipna)
expected = df.apply(Series.idxmin, axis=axis, skipna=skipna)
warn = None
if skipna is False or axis == 1:
warn = None if df is int_frame else FutureWarning
msg = "The behavior of DataFrame.idxmin with all-NA values"
with tm.assert_produces_warning(warn, match=msg):
result = df.idxmin(axis=axis, skipna=skipna)

msg2 = "The behavior of Series.idxmin"
with tm.assert_produces_warning(warn, match=msg2):
expected = df.apply(Series.idxmin, axis=axis, skipna=skipna)
expected = expected.astype(df.index.dtype)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -1008,8 +1016,16 @@ def test_idxmax(self, float_frame, int_frame, skipna, axis):
frame.iloc[5:10] = np.nan
frame.iloc[15:20, -2:] = np.nan
for df in [frame, int_frame]:
result = df.idxmax(axis=axis, skipna=skipna)
expected = df.apply(Series.idxmax, axis=axis, skipna=skipna)
warn = None
if skipna is False or axis == 1:
warn = None if df is int_frame else FutureWarning
msg = "The behavior of DataFrame.idxmax with all-NA values"
with tm.assert_produces_warning(warn, match=msg):
result = df.idxmax(axis=axis, skipna=skipna)

msg2 = "The behavior of Series.idxmax"
with tm.assert_produces_warning(warn, match=msg2):
expected = df.apply(Series.idxmax, axis=axis, skipna=skipna)
expected = expected.astype(df.index.dtype)
tm.assert_series_equal(result, expected)

Expand Down
46 changes: 33 additions & 13 deletions pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ def test_nanargminmax(self, opname, index_or_series):
# GH#7261
klass = index_or_series
arg_op = "arg" + opname if klass is Index else "idx" + opname
warn = FutureWarning if klass is Index else None

obj = klass([NaT, datetime(2011, 11, 1)])
assert getattr(obj, arg_op)() == 1
Expand All @@ -122,7 +121,9 @@ def test_nanargminmax(self, opname, index_or_series):
"The behavior of (DatetimeIndex|Series).argmax/argmin with "
"skipna=False and NAs"
)
with tm.assert_produces_warning(warn, match=msg):
if klass is Series:
msg = "The behavior of Series.(idxmax|idxmin) with all-NA"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = getattr(obj, arg_op)(skipna=False)
if klass is Series:
assert np.isnan(result)
Expand All @@ -132,7 +133,7 @@ def test_nanargminmax(self, opname, index_or_series):
obj = klass([NaT, datetime(2011, 11, 1), NaT])
# check DatetimeIndex non-monotonic path
assert getattr(obj, arg_op)() == 1
with tm.assert_produces_warning(warn, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg):
result = getattr(obj, arg_op)(skipna=False)
if klass is Series:
assert np.isnan(result)
Expand Down Expand Up @@ -836,16 +837,24 @@ def test_idxmin_dt64index(self):
ser = Series(
[1.0, 2.0, np.nan], index=DatetimeIndex(["NaT", "2015-02-08", "NaT"])
)
res = ser.idxmin(skipna=False)
msg = "The behavior of Series.idxmin with all-NA values"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = ser.idxmin(skipna=False)
assert res is NaT
res = ser.idxmax(skipna=False)
msg = "The behavior of Series.idxmax with all-NA values"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = ser.idxmax(skipna=False)
assert res is NaT

df = ser.to_frame()
res = df.idxmin(skipna=False)
msg = "The behavior of DataFrame.idxmin with all-NA values"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = df.idxmin(skipna=False)
assert res.dtype == "M8[ns]"
assert res.isna().all()
res = df.idxmax(skipna=False)
msg = "The behavior of DataFrame.idxmax with all-NA values"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = df.idxmax(skipna=False)
assert res.dtype == "M8[ns]"
assert res.isna().all()

Expand All @@ -859,7 +868,9 @@ def test_idxmin(self):

# skipna or no
assert string_series[string_series.idxmin()] == string_series.min()
assert isna(string_series.idxmin(skipna=False))
msg = "The behavior of Series.idxmin"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert isna(string_series.idxmin(skipna=False))

# no NaNs
nona = string_series.dropna()
Expand All @@ -868,7 +879,8 @@ def test_idxmin(self):

# all NaNs
allna = string_series * np.nan
assert isna(allna.idxmin())
with tm.assert_produces_warning(FutureWarning, match=msg):
assert isna(allna.idxmin())

# datetime64[ns]
s = Series(date_range("20130102", periods=6))
Expand All @@ -889,7 +901,9 @@ def test_idxmax(self):

# skipna or no
assert string_series[string_series.idxmax()] == string_series.max()
assert isna(string_series.idxmax(skipna=False))
msg = "The behavior of Series.idxmax with all-NA values"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert isna(string_series.idxmax(skipna=False))

# no NaNs
nona = string_series.dropna()
Expand All @@ -898,7 +912,9 @@ def test_idxmax(self):

# all NaNs
allna = string_series * np.nan
assert isna(allna.idxmax())
msg = "The behavior of Series.idxmax with all-NA values"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert isna(allna.idxmax())

s = Series(date_range("20130102", periods=6))
result = s.idxmax()
Expand Down Expand Up @@ -1199,10 +1215,14 @@ def test_idxminmax_with_inf(self):
s = Series([0, -np.inf, np.inf, np.nan])

assert s.idxmin() == 1
assert np.isnan(s.idxmin(skipna=False))
msg = "The behavior of Series.idxmin with all-NA values"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert np.isnan(s.idxmin(skipna=False))

assert s.idxmax() == 2
assert np.isnan(s.idxmax(skipna=False))
msg = "The behavior of Series.idxmax with all-NA values"
with tm.assert_produces_warning(FutureWarning, match=msg):
assert np.isnan(s.idxmax(skipna=False))

msg = "use_inf_as_na option is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
Expand Down