diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index ed8646bafc4db..18c76dd3cca08 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -309,6 +309,7 @@ Deprecations - Deprecated ``axis=1`` in :meth:`DataFrame.ewm`, :meth:`DataFrame.rolling`, :meth:`DataFrame.expanding`, transpose before calling the method instead (:issue:`51778`) - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`) - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`) +- Deprecated behavior of :meth:`DataFrame.idxmax`, :meth:`DataFrame.idxmin`, :meth:`Series.idxmax`, :meth:`Series.idxmin` in with all-NA entries or any-NA and ``skipna=False``; in a future version these will raise ``ValueError`` (:issue:`51276`) - Deprecated explicit support for subclassing :class:`Index` (:issue:`45289`) - Deprecated making functions given to :meth:`Series.agg` attempt to operate on each element in the :class:`Series` and only operate on the whole :class:`Series` if the elementwise operations failed. In the future, functions given to :meth:`Series.agg` will always operate on the whole :class:`Series` only. To keep the current behavior, use :meth:`Series.transform` instead. (:issue:`53325`) - Deprecated making the functions in a list of functions given to :meth:`DataFrame.agg` attempt to operate on each element in the :class:`DataFrame` and only operate on the columns of the :class:`DataFrame` if the elementwise operations failed. To keep the current behavior, use :meth:`DataFrame.transform` instead. (:issue:`53325`) diff --git a/pandas/conftest.py b/pandas/conftest.py index d7e8fbeb9336b..45fe755568d76 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -142,6 +142,10 @@ def pytest_collection_modifyitems(items, config) -> None: ("is_sparse", "is_sparse is deprecated"), ("NDFrame.replace", "The 'method' keyword"), ("NDFrame.replace", "Series.replace without 'value'"), + ("Series.idxmin", "The behavior of Series.idxmin"), + ("Series.idxmax", "The behavior of Series.idxmax"), + ("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"), + ("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"), # Docstring divides by zero to show behavior difference ("missing.mask_zero_div_zero", "divide by zero encountered"), ( diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ad437a96f7c89..6433488df34df 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11264,6 +11264,15 @@ def idxmin( indices = res._values # indices will always be np.ndarray since axis is not N + if (indices == -1).any(): + warnings.warn( + f"The behavior of {type(self).__name__}.idxmin with all-NA " + "values, or any-NA and skipna=False, is deprecated. In a future " + "version this will raise ValueError", + FutureWarning, + stacklevel=find_stack_level(), + ) + index = data._get_axis(axis) result = algorithms.take( index._values, indices, allow_fill=True, fill_value=index._na_value @@ -11292,6 +11301,15 @@ def idxmax( indices = res._values # indices will always be 1d array since axis is not None + if (indices == -1).any(): + warnings.warn( + f"The behavior of {type(self).__name__}.idxmax with all-NA " + "values, or any-NA and skipna=False, is deprecated. In a future " + "version this will raise ValueError", + FutureWarning, + stacklevel=find_stack_level(), + ) + index = data._get_axis(axis) result = algorithms.take( index._values, indices, allow_fill=True, fill_value=index._na_value diff --git a/pandas/core/series.py b/pandas/core/series.py index b7445bf158b90..927168e8c473c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2536,8 +2536,16 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab # warning for idxmin warnings.simplefilter("ignore") i = self.argmin(axis, skipna, *args, **kwargs) + if i == -1: # GH#43587 give correct NA value for Index. + warnings.warn( + f"The behavior of {type(self).__name__}.idxmin with all-NA " + "values, or any-NA and skipna=False, is deprecated. In a future " + "version this will raise ValueError", + FutureWarning, + stacklevel=find_stack_level(), + ) return self.index._na_value return self.index[i] @@ -2612,8 +2620,16 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab # warning for argmax warnings.simplefilter("ignore") i = self.argmax(axis, skipna, *args, **kwargs) + if i == -1: # GH#43587 give correct NA value for Index. + warnings.warn( + f"The behavior of {type(self).__name__}.idxmax with all-NA " + "values, or any-NA and skipna=False, is deprecated. In a future " + "version this will raise ValueError", + FutureWarning, + stacklevel=find_stack_level(), + ) return self.index._na_value return self.index[i] diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index bc79db3d18ad7..593babacbe5ff 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -161,9 +161,12 @@ def test_argreduce_series( ): # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. warn = None + msg = "The behavior of Series.argmax/argmin" if op_name.startswith("arg") and expected == -1: warn = FutureWarning - msg = "The behavior of Series.argmax/argmin" + if op_name.startswith("idx") and np.isnan(expected): + warn = FutureWarning + msg = f"The behavior of Series.{op_name}" ser = pd.Series(data_missing_for_sorting) with tm.assert_produces_warning(warn, match=msg): result = getattr(ser, op_name)(skipna=skipna) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index d1e48192edf0a..0c8e2be3eb836 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -968,8 +968,16 @@ def test_idxmin(self, float_frame, int_frame, skipna, axis): frame.iloc[5:10] = np.nan frame.iloc[15:20, -2:] = np.nan for df in [frame, int_frame]: - result = df.idxmin(axis=axis, skipna=skipna) - expected = df.apply(Series.idxmin, axis=axis, skipna=skipna) + warn = None + if skipna is False or axis == 1: + warn = None if df is int_frame else FutureWarning + msg = "The behavior of DataFrame.idxmin with all-NA values" + with tm.assert_produces_warning(warn, match=msg): + result = df.idxmin(axis=axis, skipna=skipna) + + msg2 = "The behavior of Series.idxmin" + with tm.assert_produces_warning(warn, match=msg2): + expected = df.apply(Series.idxmin, axis=axis, skipna=skipna) expected = expected.astype(df.index.dtype) tm.assert_series_equal(result, expected) @@ -1008,8 +1016,16 @@ def test_idxmax(self, float_frame, int_frame, skipna, axis): frame.iloc[5:10] = np.nan frame.iloc[15:20, -2:] = np.nan for df in [frame, int_frame]: - result = df.idxmax(axis=axis, skipna=skipna) - expected = df.apply(Series.idxmax, axis=axis, skipna=skipna) + warn = None + if skipna is False or axis == 1: + warn = None if df is int_frame else FutureWarning + msg = "The behavior of DataFrame.idxmax with all-NA values" + with tm.assert_produces_warning(warn, match=msg): + result = df.idxmax(axis=axis, skipna=skipna) + + msg2 = "The behavior of Series.idxmax" + with tm.assert_produces_warning(warn, match=msg2): + expected = df.apply(Series.idxmax, axis=axis, skipna=skipna) expected = expected.astype(df.index.dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 2f5deb7b2bbd5..c2a4205349cb6 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -113,7 +113,6 @@ def test_nanargminmax(self, opname, index_or_series): # GH#7261 klass = index_or_series arg_op = "arg" + opname if klass is Index else "idx" + opname - warn = FutureWarning if klass is Index else None obj = klass([NaT, datetime(2011, 11, 1)]) assert getattr(obj, arg_op)() == 1 @@ -122,7 +121,9 @@ def test_nanargminmax(self, opname, index_or_series): "The behavior of (DatetimeIndex|Series).argmax/argmin with " "skipna=False and NAs" ) - with tm.assert_produces_warning(warn, match=msg): + if klass is Series: + msg = "The behavior of Series.(idxmax|idxmin) with all-NA" + with tm.assert_produces_warning(FutureWarning, match=msg): result = getattr(obj, arg_op)(skipna=False) if klass is Series: assert np.isnan(result) @@ -132,7 +133,7 @@ def test_nanargminmax(self, opname, index_or_series): obj = klass([NaT, datetime(2011, 11, 1), NaT]) # check DatetimeIndex non-monotonic path assert getattr(obj, arg_op)() == 1 - with tm.assert_produces_warning(warn, match=msg): + with tm.assert_produces_warning(FutureWarning, match=msg): result = getattr(obj, arg_op)(skipna=False) if klass is Series: assert np.isnan(result) @@ -836,16 +837,24 @@ def test_idxmin_dt64index(self): ser = Series( [1.0, 2.0, np.nan], index=DatetimeIndex(["NaT", "2015-02-08", "NaT"]) ) - res = ser.idxmin(skipna=False) + msg = "The behavior of Series.idxmin with all-NA values" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = ser.idxmin(skipna=False) assert res is NaT - res = ser.idxmax(skipna=False) + msg = "The behavior of Series.idxmax with all-NA values" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = ser.idxmax(skipna=False) assert res is NaT df = ser.to_frame() - res = df.idxmin(skipna=False) + msg = "The behavior of DataFrame.idxmin with all-NA values" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = df.idxmin(skipna=False) assert res.dtype == "M8[ns]" assert res.isna().all() - res = df.idxmax(skipna=False) + msg = "The behavior of DataFrame.idxmax with all-NA values" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = df.idxmax(skipna=False) assert res.dtype == "M8[ns]" assert res.isna().all() @@ -859,7 +868,9 @@ def test_idxmin(self): # skipna or no assert string_series[string_series.idxmin()] == string_series.min() - assert isna(string_series.idxmin(skipna=False)) + msg = "The behavior of Series.idxmin" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert isna(string_series.idxmin(skipna=False)) # no NaNs nona = string_series.dropna() @@ -868,7 +879,8 @@ def test_idxmin(self): # all NaNs allna = string_series * np.nan - assert isna(allna.idxmin()) + with tm.assert_produces_warning(FutureWarning, match=msg): + assert isna(allna.idxmin()) # datetime64[ns] s = Series(date_range("20130102", periods=6)) @@ -889,7 +901,9 @@ def test_idxmax(self): # skipna or no assert string_series[string_series.idxmax()] == string_series.max() - assert isna(string_series.idxmax(skipna=False)) + msg = "The behavior of Series.idxmax with all-NA values" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert isna(string_series.idxmax(skipna=False)) # no NaNs nona = string_series.dropna() @@ -898,7 +912,9 @@ def test_idxmax(self): # all NaNs allna = string_series * np.nan - assert isna(allna.idxmax()) + msg = "The behavior of Series.idxmax with all-NA values" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert isna(allna.idxmax()) s = Series(date_range("20130102", periods=6)) result = s.idxmax() @@ -1199,10 +1215,14 @@ def test_idxminmax_with_inf(self): s = Series([0, -np.inf, np.inf, np.nan]) assert s.idxmin() == 1 - assert np.isnan(s.idxmin(skipna=False)) + msg = "The behavior of Series.idxmin with all-NA values" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert np.isnan(s.idxmin(skipna=False)) assert s.idxmax() == 2 - assert np.isnan(s.idxmax(skipna=False)) + msg = "The behavior of Series.idxmax with all-NA values" + with tm.assert_produces_warning(FutureWarning, match=msg): + assert np.isnan(s.idxmax(skipna=False)) msg = "use_inf_as_na option is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg):