Skip to content

Commit 889df6b

Browse files
jbrockmendeljamie-harness
authored andcommitted
DEPR: idxmin/idxmax with all-NA (pandas-dev#54226)
* DEPR: idxmin/idxmax with all-NA * Fix doctests
1 parent 76f0dad commit 889df6b

File tree

7 files changed

+96
-18
lines changed

7 files changed

+96
-18
lines changed

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ Deprecations
309309
- Deprecated ``axis=1`` in :meth:`DataFrame.ewm`, :meth:`DataFrame.rolling`, :meth:`DataFrame.expanding`, transpose before calling the method instead (:issue:`51778`)
310310
- Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
311311
- Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
312+
- Deprecated behavior of :meth:`DataFrame.idxmax`, :meth:`DataFrame.idxmin`, :meth:`Series.idxmax`, :meth:`Series.idxmin` in with all-NA entries or any-NA and ``skipna=False``; in a future version these will raise ``ValueError`` (:issue:`51276`)
312313
- Deprecated explicit support for subclassing :class:`Index` (:issue:`45289`)
313314
- Deprecated making functions given to :meth:`Series.agg` attempt to operate on each element in the :class:`Series` and only operate on the whole :class:`Series` if the elementwise operations failed. In the future, functions given to :meth:`Series.agg` will always operate on the whole :class:`Series` only. To keep the current behavior, use :meth:`Series.transform` instead. (:issue:`53325`)
314315
- Deprecated making the functions in a list of functions given to :meth:`DataFrame.agg` attempt to operate on each element in the :class:`DataFrame` and only operate on the columns of the :class:`DataFrame` if the elementwise operations failed. To keep the current behavior, use :meth:`DataFrame.transform` instead. (:issue:`53325`)

pandas/conftest.py

+4
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,10 @@ def pytest_collection_modifyitems(items, config) -> None:
142142
("is_sparse", "is_sparse is deprecated"),
143143
("NDFrame.replace", "The 'method' keyword"),
144144
("NDFrame.replace", "Series.replace without 'value'"),
145+
("Series.idxmin", "The behavior of Series.idxmin"),
146+
("Series.idxmax", "The behavior of Series.idxmax"),
147+
("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"),
148+
("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"),
145149
# Docstring divides by zero to show behavior difference
146150
("missing.mask_zero_div_zero", "divide by zero encountered"),
147151
(

pandas/core/frame.py

+18
Original file line numberDiff line numberDiff line change
@@ -11292,6 +11292,15 @@ def idxmin(
1129211292
indices = res._values
1129311293
# indices will always be np.ndarray since axis is not N
1129411294

11295+
if (indices == -1).any():
11296+
warnings.warn(
11297+
f"The behavior of {type(self).__name__}.idxmin with all-NA "
11298+
"values, or any-NA and skipna=False, is deprecated. In a future "
11299+
"version this will raise ValueError",
11300+
FutureWarning,
11301+
stacklevel=find_stack_level(),
11302+
)
11303+
1129511304
index = data._get_axis(axis)
1129611305
result = algorithms.take(
1129711306
index._values, indices, allow_fill=True, fill_value=index._na_value
@@ -11320,6 +11329,15 @@ def idxmax(
1132011329
indices = res._values
1132111330
# indices will always be 1d array since axis is not None
1132211331

11332+
if (indices == -1).any():
11333+
warnings.warn(
11334+
f"The behavior of {type(self).__name__}.idxmax with all-NA "
11335+
"values, or any-NA and skipna=False, is deprecated. In a future "
11336+
"version this will raise ValueError",
11337+
FutureWarning,
11338+
stacklevel=find_stack_level(),
11339+
)
11340+
1132311341
index = data._get_axis(axis)
1132411342
result = algorithms.take(
1132511343
index._values, indices, allow_fill=True, fill_value=index._na_value

pandas/core/series.py

+16
Original file line numberDiff line numberDiff line change
@@ -2536,8 +2536,16 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
25362536
# warning for idxmin
25372537
warnings.simplefilter("ignore")
25382538
i = self.argmin(axis, skipna, *args, **kwargs)
2539+
25392540
if i == -1:
25402541
# GH#43587 give correct NA value for Index.
2542+
warnings.warn(
2543+
f"The behavior of {type(self).__name__}.idxmin with all-NA "
2544+
"values, or any-NA and skipna=False, is deprecated. In a future "
2545+
"version this will raise ValueError",
2546+
FutureWarning,
2547+
stacklevel=find_stack_level(),
2548+
)
25412549
return self.index._na_value
25422550
return self.index[i]
25432551

@@ -2612,8 +2620,16 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
26122620
# warning for argmax
26132621
warnings.simplefilter("ignore")
26142622
i = self.argmax(axis, skipna, *args, **kwargs)
2623+
26152624
if i == -1:
26162625
# GH#43587 give correct NA value for Index.
2626+
warnings.warn(
2627+
f"The behavior of {type(self).__name__}.idxmax with all-NA "
2628+
"values, or any-NA and skipna=False, is deprecated. In a future "
2629+
"version this will raise ValueError",
2630+
FutureWarning,
2631+
stacklevel=find_stack_level(),
2632+
)
26172633
return self.index._na_value
26182634
return self.index[i]
26192635

pandas/tests/extension/base/methods.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,12 @@ def test_argreduce_series(
161161
):
162162
# data_missing_for_sorting -> [B, NA, A] with A < B and NA missing.
163163
warn = None
164+
msg = "The behavior of Series.argmax/argmin"
164165
if op_name.startswith("arg") and expected == -1:
165166
warn = FutureWarning
166-
msg = "The behavior of Series.argmax/argmin"
167+
if op_name.startswith("idx") and np.isnan(expected):
168+
warn = FutureWarning
169+
msg = f"The behavior of Series.{op_name}"
167170
ser = pd.Series(data_missing_for_sorting)
168171
with tm.assert_produces_warning(warn, match=msg):
169172
result = getattr(ser, op_name)(skipna=skipna)

pandas/tests/frame/test_reductions.py

+20-4
Original file line numberDiff line numberDiff line change
@@ -968,8 +968,16 @@ def test_idxmin(self, float_frame, int_frame, skipna, axis):
968968
frame.iloc[5:10] = np.nan
969969
frame.iloc[15:20, -2:] = np.nan
970970
for df in [frame, int_frame]:
971-
result = df.idxmin(axis=axis, skipna=skipna)
972-
expected = df.apply(Series.idxmin, axis=axis, skipna=skipna)
971+
warn = None
972+
if skipna is False or axis == 1:
973+
warn = None if df is int_frame else FutureWarning
974+
msg = "The behavior of DataFrame.idxmin with all-NA values"
975+
with tm.assert_produces_warning(warn, match=msg):
976+
result = df.idxmin(axis=axis, skipna=skipna)
977+
978+
msg2 = "The behavior of Series.idxmin"
979+
with tm.assert_produces_warning(warn, match=msg2):
980+
expected = df.apply(Series.idxmin, axis=axis, skipna=skipna)
973981
expected = expected.astype(df.index.dtype)
974982
tm.assert_series_equal(result, expected)
975983

@@ -1009,8 +1017,16 @@ def test_idxmax(self, float_frame, int_frame, skipna, axis):
10091017
frame.iloc[5:10] = np.nan
10101018
frame.iloc[15:20, -2:] = np.nan
10111019
for df in [frame, int_frame]:
1012-
result = df.idxmax(axis=axis, skipna=skipna)
1013-
expected = df.apply(Series.idxmax, axis=axis, skipna=skipna)
1020+
warn = None
1021+
if skipna is False or axis == 1:
1022+
warn = None if df is int_frame else FutureWarning
1023+
msg = "The behavior of DataFrame.idxmax with all-NA values"
1024+
with tm.assert_produces_warning(warn, match=msg):
1025+
result = df.idxmax(axis=axis, skipna=skipna)
1026+
1027+
msg2 = "The behavior of Series.idxmax"
1028+
with tm.assert_produces_warning(warn, match=msg2):
1029+
expected = df.apply(Series.idxmax, axis=axis, skipna=skipna)
10141030
expected = expected.astype(df.index.dtype)
10151031
tm.assert_series_equal(result, expected)
10161032

pandas/tests/reductions/test_reductions.py

+33-13
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ def test_nanargminmax(self, opname, index_or_series):
116116
# GH#7261
117117
klass = index_or_series
118118
arg_op = "arg" + opname if klass is Index else "idx" + opname
119-
warn = FutureWarning if klass is Index else None
120119

121120
obj = klass([NaT, datetime(2011, 11, 1)])
122121
assert getattr(obj, arg_op)() == 1
@@ -125,7 +124,9 @@ def test_nanargminmax(self, opname, index_or_series):
125124
"The behavior of (DatetimeIndex|Series).argmax/argmin with "
126125
"skipna=False and NAs"
127126
)
128-
with tm.assert_produces_warning(warn, match=msg):
127+
if klass is Series:
128+
msg = "The behavior of Series.(idxmax|idxmin) with all-NA"
129+
with tm.assert_produces_warning(FutureWarning, match=msg):
129130
result = getattr(obj, arg_op)(skipna=False)
130131
if klass is Series:
131132
assert np.isnan(result)
@@ -135,7 +136,7 @@ def test_nanargminmax(self, opname, index_or_series):
135136
obj = klass([NaT, datetime(2011, 11, 1), NaT])
136137
# check DatetimeIndex non-monotonic path
137138
assert getattr(obj, arg_op)() == 1
138-
with tm.assert_produces_warning(warn, match=msg):
139+
with tm.assert_produces_warning(FutureWarning, match=msg):
139140
result = getattr(obj, arg_op)(skipna=False)
140141
if klass is Series:
141142
assert np.isnan(result)
@@ -839,16 +840,24 @@ def test_idxmin_dt64index(self):
839840
ser = Series(
840841
[1.0, 2.0, np.nan], index=DatetimeIndex(["NaT", "2015-02-08", "NaT"])
841842
)
842-
res = ser.idxmin(skipna=False)
843+
msg = "The behavior of Series.idxmin with all-NA values"
844+
with tm.assert_produces_warning(FutureWarning, match=msg):
845+
res = ser.idxmin(skipna=False)
843846
assert res is NaT
844-
res = ser.idxmax(skipna=False)
847+
msg = "The behavior of Series.idxmax with all-NA values"
848+
with tm.assert_produces_warning(FutureWarning, match=msg):
849+
res = ser.idxmax(skipna=False)
845850
assert res is NaT
846851

847852
df = ser.to_frame()
848-
res = df.idxmin(skipna=False)
853+
msg = "The behavior of DataFrame.idxmin with all-NA values"
854+
with tm.assert_produces_warning(FutureWarning, match=msg):
855+
res = df.idxmin(skipna=False)
849856
assert res.dtype == "M8[ns]"
850857
assert res.isna().all()
851-
res = df.idxmax(skipna=False)
858+
msg = "The behavior of DataFrame.idxmax with all-NA values"
859+
with tm.assert_produces_warning(FutureWarning, match=msg):
860+
res = df.idxmax(skipna=False)
852861
assert res.dtype == "M8[ns]"
853862
assert res.isna().all()
854863

@@ -862,7 +871,9 @@ def test_idxmin(self):
862871

863872
# skipna or no
864873
assert string_series[string_series.idxmin()] == string_series.min()
865-
assert isna(string_series.idxmin(skipna=False))
874+
msg = "The behavior of Series.idxmin"
875+
with tm.assert_produces_warning(FutureWarning, match=msg):
876+
assert isna(string_series.idxmin(skipna=False))
866877

867878
# no NaNs
868879
nona = string_series.dropna()
@@ -871,7 +882,8 @@ def test_idxmin(self):
871882

872883
# all NaNs
873884
allna = string_series * np.nan
874-
assert isna(allna.idxmin())
885+
with tm.assert_produces_warning(FutureWarning, match=msg):
886+
assert isna(allna.idxmin())
875887

876888
# datetime64[ns]
877889
s = Series(date_range("20130102", periods=6))
@@ -892,7 +904,9 @@ def test_idxmax(self):
892904

893905
# skipna or no
894906
assert string_series[string_series.idxmax()] == string_series.max()
895-
assert isna(string_series.idxmax(skipna=False))
907+
msg = "The behavior of Series.idxmax with all-NA values"
908+
with tm.assert_produces_warning(FutureWarning, match=msg):
909+
assert isna(string_series.idxmax(skipna=False))
896910

897911
# no NaNs
898912
nona = string_series.dropna()
@@ -901,7 +915,9 @@ def test_idxmax(self):
901915

902916
# all NaNs
903917
allna = string_series * np.nan
904-
assert isna(allna.idxmax())
918+
msg = "The behavior of Series.idxmax with all-NA values"
919+
with tm.assert_produces_warning(FutureWarning, match=msg):
920+
assert isna(allna.idxmax())
905921

906922
s = Series(date_range("20130102", periods=6))
907923
result = s.idxmax()
@@ -1202,10 +1218,14 @@ def test_idxminmax_with_inf(self):
12021218
s = Series([0, -np.inf, np.inf, np.nan])
12031219

12041220
assert s.idxmin() == 1
1205-
assert np.isnan(s.idxmin(skipna=False))
1221+
msg = "The behavior of Series.idxmin with all-NA values"
1222+
with tm.assert_produces_warning(FutureWarning, match=msg):
1223+
assert np.isnan(s.idxmin(skipna=False))
12061224

12071225
assert s.idxmax() == 2
1208-
assert np.isnan(s.idxmax(skipna=False))
1226+
msg = "The behavior of Series.idxmax with all-NA values"
1227+
with tm.assert_produces_warning(FutureWarning, match=msg):
1228+
assert np.isnan(s.idxmax(skipna=False))
12091229

12101230
msg = "use_inf_as_na option is deprecated"
12111231
with tm.assert_produces_warning(FutureWarning, match=msg):

0 commit comments

Comments
 (0)