From c70529df65ac90deb52dbe1e8607d67de38fae1f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 12 Apr 2024 16:56:36 +0200 Subject: [PATCH 1/7] enforce deprecation of the Series.argsort NA behavior --- pandas/core/series.py | 24 ++++++++++++--------- pandas/tests/extension/base/methods.py | 8 +++---- pandas/tests/series/methods/test_argsort.py | 19 ++-------------- 3 files changed, 19 insertions(+), 32 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0f796964eb56d..25f36e5b4c2b0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3783,19 +3783,23 @@ def argsort( mask = isna(values) if mask.any(): + raise TypeError( + "The behavior of Series.argsort in the presence of NA values " + "is not supported" + ) # TODO(3.0): once this deprecation is enforced we can call # self.array.argsort directly, which will close GH#43840 and # GH#12694 - warnings.warn( - "The behavior of Series.argsort in the presence of NA values is " - "deprecated. In a future version, NA values will be ordered " - "last instead of set to -1.", - FutureWarning, - stacklevel=find_stack_level(), - ) - result = np.full(len(self), -1, dtype=np.intp) - notmask = ~mask - result[notmask] = np.argsort(values[notmask], kind=kind) + # warnings.warn( + # "The behavior of Series.argsort in the presence of NA values is " + # "deprecated. In a future version, NA values will be ordered " + # "last instead of set to -1.", + # FutureWarning, + # stacklevel=find_stack_level(), + # ) + # result = np.full(len(self), -1, dtype=np.intp) + # notmask = ~mask + # result[notmask] = np.argsort(values[notmask], kind=kind) else: result = np.argsort(values, kind=kind) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 225a3301b8b8c..13cc200465d81 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -115,12 +115,10 @@ def test_argsort_missing_array(self, data_missing_for_sorting): expected = np.array([2, 0, 1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) - def test_argsort_missing(self, data_missing_for_sorting): + def test_argsort_missing_raises(self, data_missing_for_sorting): msg = "The behavior of Series.argsort in the presence of NA values" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = pd.Series(data_missing_for_sorting).argsort() - expected = pd.Series(np.array([1, -1, 0], dtype=np.intp)) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match=msg): + pd.Series(data_missing_for_sorting).argsort() def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value): # GH 24382 diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index 432c0eceee011..34be128efa2d2 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -25,19 +25,6 @@ def test_argsort_numpy(self, datetime_series): expected = np.argsort(np.array(ser)) tm.assert_numpy_array_equal(res, expected) - # with missing values - ts = ser.copy() - ts[::2] = np.nan - - msg = "The behavior of Series.argsort in the presence of NA values" - with tm.assert_produces_warning( - FutureWarning, match=msg, check_stacklevel=False - ): - result = np.argsort(ts)[1::2] - expected = np.argsort(np.array(ts.dropna())) - - tm.assert_numpy_array_equal(result.values, expected) - def test_argsort(self, datetime_series): argsorted = datetime_series.argsort() assert issubclass(argsorted.dtype.type, np.integer) @@ -57,10 +44,8 @@ def test_argsort_dt64(self, unit): tm.assert_series_equal(result, expected) msg = "The behavior of Series.argsort in the presence of NA values" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = shifted.argsort() - expected = Series(list(range(4)) + [-1], dtype=np.intp) - tm.assert_series_equal(result, expected) + with pytest.raises(TypeError, match=msg): + shifted.argsort() def test_argsort_stable(self): ser = Series(np.random.default_rng(2).integers(0, 100, size=10000)) From 59db05eea15926f09d03245a6841eed5974d31b4 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 12 Apr 2024 16:59:41 +0200 Subject: [PATCH 2/7] remove comments --- pandas/core/series.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 25f36e5b4c2b0..b2db822f9835c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3787,19 +3787,6 @@ def argsort( "The behavior of Series.argsort in the presence of NA values " "is not supported" ) - # TODO(3.0): once this deprecation is enforced we can call - # self.array.argsort directly, which will close GH#43840 and - # GH#12694 - # warnings.warn( - # "The behavior of Series.argsort in the presence of NA values is " - # "deprecated. In a future version, NA values will be ordered " - # "last instead of set to -1.", - # FutureWarning, - # stacklevel=find_stack_level(), - # ) - # result = np.full(len(self), -1, dtype=np.intp) - # notmask = ~mask - # result[notmask] = np.argsort(values[notmask], kind=kind) else: result = np.argsort(values, kind=kind) From 6060ee7f60fcb56399e62de7936f8e7643be8c36 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Fri, 12 Apr 2024 22:29:34 +0200 Subject: [PATCH 3/7] add a note to v3.0.0 --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e05cc87d1af14..699fd0c323d99 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -244,6 +244,7 @@ Removal of prior version deprecations/changes - Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`57627`) - Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`57627`) - Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`) +- Enforced deprecation of the behavior of :meth:`Series.argsort` in the presence of NA values (:issue:`58232`) - Enforced deprecation of values "pad", "ffill", "bfill", and "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` (:issue:`57869`) - Enforced deprecation removing :meth:`Categorical.to_list`, use ``obj.tolist()`` instead (:issue:`51254`) - Enforced silent-downcasting deprecation for :ref:`all relevant methods ` (:issue:`54710`) From 1ad77cf891dc81c546708cd78cc331d85e328fb3 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sat, 13 Apr 2024 08:54:39 +0200 Subject: [PATCH 4/7] correct def argsort and tests --- pandas/core/series.py | 7 +++---- pandas/tests/extension/base/methods.py | 8 ++++---- pandas/tests/series/methods/test_argsort.py | 14 +++++++++++--- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index b2db822f9835c..b2b37feaae00a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3783,10 +3783,9 @@ def argsort( mask = isna(values) if mask.any(): - raise TypeError( - "The behavior of Series.argsort in the presence of NA values " - "is not supported" - ) + result = np.full(len(self), -1, dtype=np.intp) + notmask = ~mask + result[notmask] = np.argsort(values[notmask], kind=kind) else: result = np.argsort(values, kind=kind) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 13cc200465d81..2bfc61ad1ca94 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -115,10 +115,10 @@ def test_argsort_missing_array(self, data_missing_for_sorting): expected = np.array([2, 0, 1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) - def test_argsort_missing_raises(self, data_missing_for_sorting): - msg = "The behavior of Series.argsort in the presence of NA values" - with pytest.raises(TypeError, match=msg): - pd.Series(data_missing_for_sorting).argsort() + def test_argsort_missing(self, data_missing_for_sorting): + result = pd.Series(data_missing_for_sorting).argsort() + expected = pd.Series(np.array([1, -1, 0], dtype=np.intp)) + tm.assert_series_equal(result, expected) def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value): # GH 24382 diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index 34be128efa2d2..61d902fb70dfe 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -25,6 +25,14 @@ def test_argsort_numpy(self, datetime_series): expected = np.argsort(np.array(ser)) tm.assert_numpy_array_equal(res, expected) + # with missing values + ts = ser.copy() + ts[::2] = np.nan + result = np.argsort(ts)[1::2] + expected = np.argsort(np.array(ts.dropna())) + + tm.assert_numpy_array_equal(result.values, expected) + def test_argsort(self, datetime_series): argsorted = datetime_series.argsort() assert issubclass(argsorted.dtype.type, np.integer) @@ -43,9 +51,9 @@ def test_argsort_dt64(self, unit): expected = Series(range(5), dtype=np.intp) tm.assert_series_equal(result, expected) - msg = "The behavior of Series.argsort in the presence of NA values" - with pytest.raises(TypeError, match=msg): - shifted.argsort() + result = shifted.argsort() + expected = Series(list(range(4)) + [-1], dtype=np.intp) + tm.assert_series_equal(result, expected) def test_argsort_stable(self): ser = Series(np.random.default_rng(2).integers(0, 100, size=10000)) From 6de0779eebe664277ace3910ba1005427fc6167c Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 15 Apr 2024 15:28:05 +0200 Subject: [PATCH 5/7] correct def argsort/tests --- pandas/core/series.py | 10 +--------- pandas/tests/extension/base/methods.py | 2 +- pandas/tests/series/methods/test_argsort.py | 13 ++----------- 3 files changed, 4 insertions(+), 21 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index b2b37feaae00a..b8419dbbfa5e9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3779,15 +3779,7 @@ def argsort( # GH#54257 We allow -1 here so that np.argsort(series) works self._get_axis_number(axis) - values = self._values - mask = isna(values) - - if mask.any(): - result = np.full(len(self), -1, dtype=np.intp) - notmask = ~mask - result[notmask] = np.argsort(values[notmask], kind=kind) - else: - result = np.argsort(values, kind=kind) + result = self.array.argsort(kind=kind) res = self._constructor( result, index=self.index, name=self.name, dtype=np.intp, copy=False diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 2bfc61ad1ca94..3301797b31a27 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -117,7 +117,7 @@ def test_argsort_missing_array(self, data_missing_for_sorting): def test_argsort_missing(self, data_missing_for_sorting): result = pd.Series(data_missing_for_sorting).argsort() - expected = pd.Series(np.array([1, -1, 0], dtype=np.intp)) + expected = pd.Series(np.array([2, 0, 1], dtype=np.intp)) tm.assert_series_equal(result, expected) def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value): diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index 61d902fb70dfe..d70387a225949 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -19,20 +19,11 @@ def test_argsort_axis(self): ser.argsort(axis=2) def test_argsort_numpy(self, datetime_series): - ser = datetime_series - + ser = datetime_series res = np.argsort(ser).values expected = np.argsort(np.array(ser)) tm.assert_numpy_array_equal(res, expected) - # with missing values - ts = ser.copy() - ts[::2] = np.nan - result = np.argsort(ts)[1::2] - expected = np.argsort(np.array(ts.dropna())) - - tm.assert_numpy_array_equal(result.values, expected) - def test_argsort(self, datetime_series): argsorted = datetime_series.argsort() assert issubclass(argsorted.dtype.type, np.integer) @@ -52,7 +43,7 @@ def test_argsort_dt64(self, unit): tm.assert_series_equal(result, expected) result = shifted.argsort() - expected = Series(list(range(4)) + [-1], dtype=np.intp) + expected = Series(list(range(4)) + [4], dtype=np.intp) tm.assert_series_equal(result, expected) def test_argsort_stable(self): From e65b32130f48a8065b9c0b8fa94730414cae14d7 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 15 Apr 2024 15:30:22 +0200 Subject: [PATCH 6/7] fix pre-commit error --- pandas/tests/series/methods/test_argsort.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index d70387a225949..53a62f344437c 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -19,7 +19,7 @@ def test_argsort_axis(self): ser.argsort(axis=2) def test_argsort_numpy(self, datetime_series): - ser = datetime_series + ser = datetime_series res = np.argsort(ser).values expected = np.argsort(np.array(ser)) tm.assert_numpy_array_equal(res, expected) From 62349d599ed0d7df2a057239bc69b63829820139 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 25 Jun 2024 15:05:44 -0700 Subject: [PATCH 7/7] Restore numpy test --- pandas/tests/series/methods/test_argsort.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index 53a62f344437c..c1082c06ce307 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -24,6 +24,14 @@ def test_argsort_numpy(self, datetime_series): expected = np.argsort(np.array(ser)) tm.assert_numpy_array_equal(res, expected) + def test_argsort_numpy_missing(self): + data = [0.1, np.nan, 0.2, np.nan, 0.3] + ser = Series(data) + result = np.argsort(ser) + expected = np.argsort(np.array(data)) + + tm.assert_numpy_array_equal(result.values, expected) + def test_argsort(self, datetime_series): argsorted = datetime_series.argsort() assert issubclass(argsorted.dtype.type, np.integer)