From 52c82885bd67564ee0413065f4165fe4b8c55a6e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 Jul 2023 14:42:30 -0700 Subject: [PATCH 1/4] DEPR: argsort --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/series.py | 7 +++++++ pandas/tests/extension/base/methods.py | 4 +++- pandas/tests/series/methods/test_argsort.py | 4 +++- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index ed8646bafc4db..c80677a85c30c 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -295,6 +295,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ +- Deprecated the behavior of :meth:`Series.argsort` in the presence of NA values; in a future version these will be sorted at the end instead of giving -1 (:issue:`??`) - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) - Deprecated 'downcast' keyword in :meth:`Index.fillna` (:issue:`53956`) - Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``ffill`` or ``bfill`` before calling ``pct_change`` instead (:issue:`53491`) diff --git a/pandas/core/series.py b/pandas/core/series.py index b7445bf158b90..b376ee46b7949 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3946,6 +3946,13 @@ def argsort( mask = isna(values) if mask.any(): + warnings.warn( + "The behavior of Series.argsort in the presence of NA values is " + "deprecated. In a future version, NA values will be ordered " + "last instead of set to -1.", + FutureWarning, + stacklevel=find_stack_level(), + ) result = np.full(len(self), -1, dtype=np.intp) notmask = ~mask result[notmask] = np.argsort(values[notmask], kind=kind) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index bc79db3d18ad7..7a5c07e736811 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -107,7 +107,9 @@ def test_argsort_missing_array(self, data_missing_for_sorting): tm.assert_numpy_array_equal(result, expected) def test_argsort_missing(self, data_missing_for_sorting): - result = pd.Series(data_missing_for_sorting).argsort() + msg = "The behavior of Series.argsort in the presence of NA values" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = pd.Series(data_missing_for_sorting).argsort() expected = pd.Series(np.array([1, -1, 0], dtype=np.intp)) self.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index e1d64795e235d..c6ebd4bef4b18 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -41,7 +41,9 @@ def test_argsort(self, datetime_series): expected = Series(range(5), dtype=np.intp) tm.assert_series_equal(result, expected) - result = shifted.argsort() + msg = "The behavior of Series.argsort in the presence of NA values" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = shifted.argsort() expected = Series(list(range(4)) + [-1], dtype=np.intp) tm.assert_series_equal(result, expected) From 202152a40053c0b5f830adb5caf1aa830a6756e5 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 22 Jul 2023 08:42:08 -0700 Subject: [PATCH 2/4] Catch warning in np.argsort test --- pandas/tests/series/methods/test_argsort.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index c6ebd4bef4b18..8549ecb99d061 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -21,7 +21,11 @@ def test_argsort_numpy(self, datetime_series): ts = ser.copy() ts[::2] = np.NaN - result = func(ts)[1::2] + msg = "The behavior of Series.argsort in the presence of NA values" + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + result = func(ts)[1::2] expected = func(np.array(ts.dropna())) tm.assert_numpy_array_equal(result.values, expected, check_dtype=False) From 98904c57c08e772ce5e7f4b62c20050f09737099 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 24 Jul 2023 12:27:50 -0700 Subject: [PATCH 3/4] Update doc/source/whatsnew/v2.1.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c80677a85c30c..305768778a483 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -295,7 +295,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ -- Deprecated the behavior of :meth:`Series.argsort` in the presence of NA values; in a future version these will be sorted at the end instead of giving -1 (:issue:`??`) +- Deprecated the behavior of :meth:`Series.argsort` in the presence of NA values; in a future version these will be sorted at the end instead of giving -1 (:issue:`54219`) - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) - Deprecated 'downcast' keyword in :meth:`Index.fillna` (:issue:`53956`) - Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``ffill`` or ``bfill`` before calling ``pct_change`` instead (:issue:`53491`) From a0119a3dc0c643e4f127e5d4e617c2348730cce1 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 24 Jul 2023 13:42:19 -0700 Subject: [PATCH 4/4] sort hwatnsew --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 305768778a483..415b9e9c21f52 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -295,7 +295,6 @@ Other API changes Deprecations ~~~~~~~~~~~~ -- Deprecated the behavior of :meth:`Series.argsort` in the presence of NA values; in a future version these will be sorted at the end instead of giving -1 (:issue:`54219`) - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) - Deprecated 'downcast' keyword in :meth:`Index.fillna` (:issue:`53956`) - Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``ffill`` or ``bfill`` before calling ``pct_change`` instead (:issue:`53491`) @@ -319,6 +318,7 @@ Deprecations - Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`) - Deprecated the ``axis`` keyword in :meth:`DataFrame.resample`, :meth:`Series.resample` (:issue:`51778`) - Deprecated the behavior of :func:`concat` with both ``len(keys) != len(objs)``, in a future version this will raise instead of truncating to the shorter of the two sequences (:issue:`43485`) +- Deprecated the behavior of :meth:`Series.argsort` in the presence of NA values; in a future version these will be sorted at the end instead of giving -1 (:issue:`54219`) - Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`) - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`) - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)