From c5e09de9c531c546908f3508243283f16b1473fe Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 May 2023 09:11:27 -0700 Subject: [PATCH 1/3] DEPR: int slicing always positional --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/indexes/base.py | 20 ++++++++++++++++--- pandas/tests/frame/indexing/test_indexing.py | 4 +++- pandas/tests/indexing/test_floats.py | 9 +++++++-- .../tests/series/methods/test_interpolate.py | 14 ++++++------- 5 files changed, 35 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 6a1e5cdef30bc..44740a3d0544b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -265,6 +265,7 @@ Deprecations - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) +- Deprecated the behavior of :meth:`Series.__getitem__`, :meth:`Series.__setitem__`, :meth:`DataFrame.__getitem__`, :meth:`DataFrame.__setitem__` with an integer slice on objects with a floating-dtype index, in a future version this will be treated as *positional* indexing (:issue:`49612`) .. --------------------------------------------------------------------------- .. _whatsnew_210.performance: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 331c229e153d1..7fb1e02c7c725 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4104,16 +4104,30 @@ def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]): # potentially cast the bounds to integers start, stop, step = key.start, key.stop, key.step + # figure out if this is a positional indexer + is_index_slice = is_valid_positional_slice(key) + # TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able # to simplify this. if isinstance(self.dtype, np.dtype) and self.dtype.kind == "f": # We always treat __getitem__ slicing as label-based # translate to locations + if kind == "getitem" and is_index_slice and not start == stop and step != 0: + # exclude step=0 from the warning because it will raise anyway + # start/stop both None e.g. [:] or [::-1] won't change. + # exclude start==stop since it will be empty either way, or + # will be [:] or [::-1] which won't change + warnings.warn( + # GH#49612 + "The behavior of obj[i:j] with a float-dtype index is " + "deprecated. In a future version, this will be treated as " + "positional instead of label-based. For label-based slicing, " + "use obj.loc[i:j] instead", + FutureWarning, + stacklevel=find_stack_level(), + ) return self.slice_indexer(start, stop, step) - # figure out if this is a positional indexer - is_index_slice = is_valid_positional_slice(key) - if kind == "getitem": # called from the getitem slicers, validate that we are in fact integers if is_index_slice: diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 9005798d66d17..47ed3ea0d2bec 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -745,7 +745,9 @@ def test_getitem_setitem_float_labels(self, using_array_manager): tm.assert_frame_equal(result, expected) df.loc[1:2] = 0 - result = df[1:2] + msg = r"The behavior of obj\[i:j\] with a float-dtype index" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df[1:2] assert (result == 0).all().all() # #2727 diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 0bcc2aa75d78a..45009e18153d2 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -486,8 +486,12 @@ def test_floating_misc(self, indexer_sl): for fancy_idx in [[5, 0], np.array([5, 0])]: tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected) + warn = FutureWarning if indexer_sl is tm.setitem else None + msg = r"The behavior of obj\[i:j\] with a float-dtype index" + # all should return the same as we are slicing 'the same' - result1 = indexer_sl(s)[2:5] + with tm.assert_produces_warning(warn, match=msg): + result1 = indexer_sl(s)[2:5] result2 = indexer_sl(s)[2.0:5.0] result3 = indexer_sl(s)[2.0:5] result4 = indexer_sl(s)[2.1:5] @@ -496,7 +500,8 @@ def test_floating_misc(self, indexer_sl): tm.assert_series_equal(result1, result4) expected = Series([1, 2], index=[2.5, 5.0]) - result = indexer_sl(s)[2:5] + with tm.assert_produces_warning(warn, match=msg): + result = indexer_sl(s)[2:5] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index 6f4c4ba4dd69d..6d20bfb7f90ec 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -129,7 +129,7 @@ def test_interpolate_cubicspline(self): new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( float ) - result = ser.reindex(new_index).interpolate(method="cubicspline")[1:3] + result = ser.reindex(new_index).interpolate(method="cubicspline").loc[1:3] tm.assert_series_equal(result, expected) @td.skip_if_no_scipy @@ -142,7 +142,7 @@ def test_interpolate_pchip(self): ).astype(float) interp_s = ser.reindex(new_index).interpolate(method="pchip") # does not blow up, GH5977 - interp_s[49:51] + interp_s.loc[49:51] @td.skip_if_no_scipy def test_interpolate_akima(self): @@ -157,7 +157,7 @@ def test_interpolate_akima(self): float ) interp_s = ser.reindex(new_index).interpolate(method="akima") - tm.assert_series_equal(interp_s[1:3], expected) + tm.assert_series_equal(interp_s.loc[1:3], expected) # interpolate at new_index where `der` is a non-zero int expected = Series( @@ -168,7 +168,7 @@ def test_interpolate_akima(self): float ) interp_s = ser.reindex(new_index).interpolate(method="akima", der=1) - tm.assert_series_equal(interp_s[1:3], expected) + tm.assert_series_equal(interp_s.loc[1:3], expected) @td.skip_if_no_scipy def test_interpolate_piecewise_polynomial(self): @@ -183,7 +183,7 @@ def test_interpolate_piecewise_polynomial(self): float ) interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial") - tm.assert_series_equal(interp_s[1:3], expected) + tm.assert_series_equal(interp_s.loc[1:3], expected) @td.skip_if_no_scipy def test_interpolate_from_derivatives(self): @@ -198,7 +198,7 @@ def test_interpolate_from_derivatives(self): float ) interp_s = ser.reindex(new_index).interpolate(method="from_derivatives") - tm.assert_series_equal(interp_s[1:3], expected) + tm.assert_series_equal(interp_s.loc[1:3], expected) @pytest.mark.parametrize( "kwargs", @@ -218,7 +218,7 @@ def test_interpolate_corners(self, kwargs): def test_interpolate_index_values(self): s = Series(np.nan, index=np.sort(np.random.rand(30))) - s[::3] = np.random.randn(10) + s.loc[::3] = np.random.randn(10) vals = s.index.values.astype(float) From 389d5dfabded8431bc60dc68a58d66bbd1cb42ec Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 May 2023 10:27:26 -0700 Subject: [PATCH 2/3] update doc --- doc/source/user_guide/missing_data.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index 6ea4c213e85c8..9eaa726850348 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -463,7 +463,7 @@ at the new values. # interpolate at new_index new_index = ser.index.union(pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])) interp_s = ser.reindex(new_index).interpolate(method="pchip") - interp_s[49:51] + interp_s.loc[49:51] .. _scipy: https://scipy.org/ .. _documentation: https://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation From fc94c0ba8c69e896dd73220994e6c6afbe96a6cf Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 May 2023 11:28:20 -0700 Subject: [PATCH 3/3] okwarning --- doc/source/whatsnew/v0.13.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst index 2e086f560bd53..bfabfb1a27e73 100644 --- a/doc/source/whatsnew/v0.13.0.rst +++ b/doc/source/whatsnew/v0.13.0.rst @@ -343,6 +343,7 @@ Float64Index API change Slicing is ALWAYS on the values of the index, for ``[],ix,loc`` and ALWAYS positional with ``iloc`` .. ipython:: python + :okwarning: s[2:4] s.loc[2:4]