DEPR: inconsistent series[i:j] slicing with Int64Index GH#45162 (#45324)

jbrockmendel · web-flow · commit 51675d083948 · 2022-01-16T11:20:38.000-05:00
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -94,6 +94,50 @@ Other API changes
 
 Deprecations
 ~~~~~~~~~~~~
+
+.. _whatsnew_150.deprecations.int_slicing_series:
+
+In a future version, integer slicing on a :class:`Series` with a :class:`Int64Index` or :class:`RangeIndex` will be treated as *label-based*, not positional. This will make the behavior consistent with other :meth:`Series.__getitem__` and :meth:`Series.__setitem__` behaviors (:issue:`45162`).
+
+For example:
+
+.. ipython:: python
+
+   ser = pd.Series([1, 2, 3, 4, 5], index=[2, 3, 5, 7, 11])
+
+In the old behavior, ``ser[2:4]`` treats the slice as positional:
+
+*Old behavior*:
+
+.. code-block:: ipython
+
+    In [3]: ser[2:4]
+    Out[3]:
+    5    3
+    7    4
+    dtype: int64
+
+In a future version, this will be treated as label-based:
+
+*Future behavior*:
+
+.. code-block:: ipython
+
+    In [4]: ser.loc[2:4]
+    Out[4]:
+    2    1
+    3    2
+    dtype: int64
+
+To retain the old behavior, use ``series.iloc[i:j]``. To get the future behavior,
+use ``series.loc[i:j]``.
+
+Slicing on a :class:`DataFrame` will not be affected.
+
+.. _whatsnew_150.deprecations.other:
+
+Other Deprecations
+^^^^^^^^^^^^^^^^^^
 - Deprecated the keyword ``line_terminator`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead; this is for consistency with :func:`read_csv` and the standard library 'csv' module (:issue:`9568`)
 - Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items`  (:issue:`45321`)
 -
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -110,6 +110,7 @@
     ABCDatetimeIndex,
     ABCMultiIndex,
     ABCPeriodIndex,
+    ABCRangeIndex,
     ABCSeries,
     ABCTimedeltaIndex,
 )
@@ -3989,7 +3990,7 @@ def _validate_positional_slice(self, key: slice) -> None:
         self._validate_indexer("positional", key.stop, "iloc")
         self._validate_indexer("positional", key.step, "iloc")
 
-    def _convert_slice_indexer(self, key: slice, kind: str_t):
+    def _convert_slice_indexer(self, key: slice, kind: str_t, is_frame: bool = False):
         """
         Convert a slice indexer.
 
@@ -4000,6 +4001,9 @@ def _convert_slice_indexer(self, key: slice, kind: str_t):
         ----------
         key : label of the slice bound
         kind : {'loc', 'getitem'}
+        is_frame : bool, default False
+            Whether this is a slice called on DataFrame.__getitem__
+            as opposed to Series.__getitem__
         """
         assert kind in ["loc", "getitem"], kind
 
@@ -4020,7 +4024,44 @@ def is_int(v):
             called from the getitem slicers, validate that we are in fact
             integers
             """
+            if self.is_integer():
+                if is_frame:
+                    # unambiguously positional, no deprecation
+                    pass
+                elif start is None and stop is None:
+                    # label-based vs positional is irrelevant
+                    pass
+                elif isinstance(self, ABCRangeIndex) and self._range == range(
+                    len(self)
+                ):
+                    # In this case there is no difference between label-based
+                    #  and positional, so nothing will change.
+                    pass
+                elif (
+                    self.dtype.kind in ["i", "u"]
+                    and self._is_strictly_monotonic_increasing
+                    and len(self) > 0
+                    and self[0] == 0
+                    and self[-1] == len(self) - 1
+                ):
+                    # We are range-like, e.g. created with Index(np.arange(N))
+                    pass
+                elif not is_index_slice:
+                    # we're going to raise, so don't bother warning, e.g.
+                    #  test_integer_positional_indexing
+                    pass
+                else:
+                    warnings.warn(
+                        "The behavior of `series[i:j]` with an integer-dtype index "
+                        "is deprecated. In a future version, this will be treated "
+                        "as *label-based* indexing, consistent with e.g. `series[i]` "
+                        "lookups. To retain the old behavior, use `series.iloc[i:j]`. "
+                        "To get the future behavior, use `series.loc[i:j]`.",
+                        FutureWarning,
+                        stacklevel=find_stack_level(),
+                    )
             if self.is_integer() or is_index_slice:
+                # Note: these checks are redundant if we know is_index_slice
                 self._validate_indexer("slice", key.start, "getitem")
                 self._validate_indexer("slice", key.stop, "getitem")
                 self._validate_indexer("slice", key.step, "getitem")
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -754,7 +754,7 @@ def _index_as_unique(self) -> bool:
         "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"
     )
 
-    def _convert_slice_indexer(self, key: slice, kind: str):
+    def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
         if not (key.step is None or key.step == 1):
             # GH#31658 if label-based, we require step == 1,
             #  if positional, we disallow float start/stop
@@ -766,7 +766,7 @@ def _convert_slice_indexer(self, key: slice, kind: str):
                     # i.e. this cannot be interpreted as a positional slice
                     raise ValueError(msg)
 
-        return super()._convert_slice_indexer(key, kind)
+        return super()._convert_slice_indexer(key, kind, is_frame=is_frame)
 
     @cache_readonly
     def _should_fallback_to_positional(self) -> bool:
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
@@ -268,15 +268,15 @@ def _should_fallback_to_positional(self) -> bool:
         return False
 
     @doc(Index._convert_slice_indexer)
-    def _convert_slice_indexer(self, key: slice, kind: str):
+    def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
         if is_float_dtype(self.dtype):
             assert kind in ["loc", "getitem"]
 
             # We always treat __getitem__ slicing as label-based
             # translate to locations
             return self.slice_indexer(key.start, key.stop, key.step)
 
-        return super()._convert_slice_indexer(key, kind=kind)
+        return super()._convert_slice_indexer(key, kind=kind, is_frame=is_frame)
 
     @doc(Index._maybe_cast_slice_bound)
     def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -2313,7 +2313,7 @@ def convert_to_index_sliceable(obj: DataFrame, key):
     """
     idx = obj.index
     if isinstance(key, slice):
-        return idx._convert_slice_indexer(key, kind="getitem")
+        return idx._convert_slice_indexer(key, kind="getitem", is_frame=True)
 
     elif isinstance(key, str):
 
diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
@@ -313,7 +313,8 @@ def test_get(self, data):
         expected = s.iloc[[2, 3]]
         self.assert_series_equal(result, expected)
 
-        result = s.get(slice(2))
+        with tm.assert_produces_warning(FutureWarning, match="label-based"):
+            result = s.get(slice(2))
         expected = s.iloc[[0, 1]]
         self.assert_series_equal(result, expected)
 
@@ -336,7 +337,9 @@ def test_get(self, data):
 
         # GH 21257
         s = pd.Series(data)
-        s2 = s[::2]
+        with tm.assert_produces_warning(None):
+            # GH#45324 make sure we aren't giving a spurious FutureWarning
+            s2 = s[::2]
         assert s2.get(1) is None
 
     def test_take_sequence(self, data):
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -1009,7 +1009,7 @@ def test_iloc_row_slice_view(self, using_array_manager):
         exp_col = original[2].copy()
         # TODO(ArrayManager) verify it is expected that the original didn't change
         if not using_array_manager:
-            exp_col[4:8] = 0.0
+            exp_col._values[4:8] = 0.0
         tm.assert_series_equal(df[2], exp_col)
 
     def test_iloc_col(self):
diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py
@@ -367,11 +367,11 @@ def test_apply_frame_not_as_index_column_name(df):
 
 def test_apply_frame_concat_series():
     def trans(group):
-        return group.groupby("B")["C"].sum().sort_values()[:2]
+        return group.groupby("B")["C"].sum().sort_values().iloc[:2]
 
     def trans2(group):
         grouped = group.groupby(df.reindex(group.index)["B"])
-        return grouped.sum().sort_values()[:2]
+        return grouped.sum().sort_values().iloc[:2]
 
     df = DataFrame(
         {
@@ -409,7 +409,7 @@ def test_apply_chunk_view():
     # Low level tinkering could be unsafe, make sure not
     df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)})
 
-    result = df.groupby("key", group_keys=False).apply(lambda x: x[:2])
+    result = df.groupby("key", group_keys=False).apply(lambda x: x.iloc[:2])
     expected = df.take([0, 1, 3, 4, 6, 7])
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py
@@ -657,7 +657,7 @@ def test_uint_index_does_not_convert_to_float64(box):
     )
     tm.assert_index_equal(result.index, expected)
 
-    tm.assert_equal(result, series[:3])
+    tm.assert_equal(result, series.iloc[:3])
 
 
 def test_float64_index_equals():
diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py
@@ -343,7 +343,8 @@ def test_integer_positional_indexing(self, idx):
         """
         s = Series(range(2, 6), index=range(2, 6))
 
-        result = s[2:4]
+        with tm.assert_produces_warning(FutureWarning, match="label-based"):
+            result = s[2:4]
         expected = s.iloc[2:4]
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -1386,8 +1386,10 @@ def test_iloc(self):
         tm.assert_series_equal(result, expected)
 
         # test slice is a view
-        result[:] = 0
-        assert (ser[1:3] == 0).all()
+        with tm.assert_produces_warning(None):
+            # GH#45324 make sure we aren't giving a spurious FutureWarning
+            result[:] = 0
+        assert (ser.iloc[1:3] == 0).all()
 
         # list of integers
         result = ser.iloc[[0, 2, 3, 4, 5]]
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -68,7 +68,8 @@ def test_setitem_ndarray_1d_2(self):
 
         msg = "Must have equal len keys and value when setting with an iterable"
         with pytest.raises(ValueError, match=msg):
-            df[2:5] = np.arange(1, 4) * 1j
+            with tm.assert_produces_warning(FutureWarning, match="label-based"):
+                df[2:5] = np.arange(1, 4) * 1j
 
     def test_getitem_ndarray_3d(
         self, index, frame_or_series, indexer_sli, using_array_manager
diff --git a/pandas/tests/series/indexing/test_get.py b/pandas/tests/series/indexing/test_get.py
@@ -167,7 +167,8 @@ def test_get_with_ea(arr):
     expected = ser.iloc[[2, 3]]
     tm.assert_series_equal(result, expected)
 
-    result = ser.get(slice(2))
+    with tm.assert_produces_warning(FutureWarning, match="label-based"):
+        result = ser.get(slice(2))
     expected = ser.iloc[[0, 1]]
     tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py
@@ -332,7 +332,8 @@ def test_getitem_slice_bug(self):
     def test_getitem_slice_integers(self):
         ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
 
-        result = ser[:4]
+        with tm.assert_produces_warning(FutureWarning, match="label-based"):
+            result = ser[:4]
         expected = Series(ser.values[:4], index=[2, 4, 6, 8])
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
@@ -215,9 +215,15 @@ def test_setitem_slice(self):
     def test_setitem_slice_integers(self):
         ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
 
-        ser[:4] = 0
-        assert (ser[:4] == 0).all()
-        assert not (ser[4:] == 0).any()
+        msg = r"In a future version, this will be treated as \*label-based\* indexing"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            ser[:4] = 0
+        with tm.assert_produces_warning(
+            FutureWarning, match=msg, check_stacklevel=False
+        ):
+            assert (ser[:4] == 0).all()
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            assert not (ser[4:] == 0).any()
 
     def test_setitem_slicestep(self):
         # caught this bug when writing tests
diff --git a/pandas/tests/series/methods/test_item.py b/pandas/tests/series/methods/test_item.py
@@ -55,5 +55,5 @@ def test_item(self):
 
         # Case where ser[0] would not work
         ser = Series(dti, index=[5, 6])
-        val = ser[:1].item()
+        val = ser.iloc[:1].item()
         assert val == dti[0]
diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py
@@ -214,7 +214,8 @@ def test_center(raw):
     expected = (
         concat([obj, Series([np.NaN] * 9)])
         .rolling(20, min_periods=15)
-        .apply(f, raw=raw)[9:]
+        .apply(f, raw=raw)
+        .iloc[9:]
         .reset_index(drop=True)
     )
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py
@@ -552,11 +552,11 @@ def test_ew_min_periods(min_periods, name):
 
 @pytest.mark.parametrize("name", ["cov", "corr"])
 def test_ewm_corr_cov(name):
-    A = Series(np.random.randn(50), index=np.arange(50))
+    A = Series(np.random.randn(50), index=range(50))
     B = A[2:] + np.random.randn(48)
 
     A[:10] = np.NaN
-    B[-10:] = np.NaN
+    B.iloc[-10:] = np.NaN
 
     result = getattr(A.ewm(com=20, min_periods=5), name)(B)
     assert np.isnan(result.values[:14]).all()
@@ -567,11 +567,11 @@ def test_ewm_corr_cov(name):
 @pytest.mark.parametrize("name", ["cov", "corr"])
 def test_ewm_corr_cov_min_periods(name, min_periods):
     # GH 7898
-    A = Series(np.random.randn(50), index=np.arange(50))
+    A = Series(np.random.randn(50), index=range(50))
     B = A[2:] + np.random.randn(48)
 
     A[:10] = np.NaN
-    B[-10:] = np.NaN
+    B.iloc[-10:] = np.NaN
 
     result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B)
     # binary functions (ewmcov, ewmcorr) with bias=False require at
@@ -593,7 +593,7 @@ def test_ewm_corr_cov_min_periods(name, min_periods):
 
 @pytest.mark.parametrize("name", ["cov", "corr"])
 def test_different_input_array_raise_exception(name):
-    A = Series(np.random.randn(50), index=np.arange(50))
+    A = Series(np.random.randn(50), index=range(50))
     A[:10] = np.NaN
 
     msg = "other must be a DataFrame or Series"
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
@@ -1391,7 +1391,7 @@ def test_rolling_corr_timedelta_index(index, window):
     # GH: 31286
     x = Series([1, 2, 3, 4, 5], index=index)
     y = x.copy()
-    x[0:2] = 0.0
+    x.iloc[0:2] = 0.0
     result = x.rolling(window).corr(y)
     expected = Series([np.nan, np.nan, 1, 1, 1], index=index)
     tm.assert_almost_equal(result, expected)
diff --git a/pandas/tests/window/test_rolling_functions.py b/pandas/tests/window/test_rolling_functions.py
@@ -247,9 +247,13 @@ def test_center(roll_func, kwargs, minp):
     result = getattr(obj.rolling(20, min_periods=minp, center=True), roll_func)(
         **kwargs
     )
-    expected = getattr(
-        concat([obj, Series([np.NaN] * 9)]).rolling(20, min_periods=minp), roll_func
-    )(**kwargs)[9:].reset_index(drop=True)
+    expected = (
+        getattr(
+            concat([obj, Series([np.NaN] * 9)]).rolling(20, min_periods=minp), roll_func
+        )(**kwargs)
+        .iloc[9:]
+        .reset_index(drop=True)
+    )
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/window/test_rolling_quantile.py b/pandas/tests/window/test_rolling_quantile.py
@@ -133,7 +133,8 @@ def test_center(q):
     expected = (
         concat([obj, Series([np.NaN] * 9)])
         .rolling(20)
-        .quantile(q)[9:]
+        .quantile(q)
+        .iloc[9:]
         .reset_index(drop=True)
     )
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py

Original file line number	Diff line number	Diff line change
`@@ -657,7 +657,7 @@ def test_uint_index_does_not_convert_to_float64(box):`
`657`	`657`	`)`
`658`	`658`	`tm.assert_index_equal(result.index, expected)`
`659`	`659`
`660`		`- tm.assert_equal(result, series[:3])`
	`660`	`+ tm.assert_equal(result, series.iloc[:3])`
`661`	`661`
`662`	`662`
`663`	`663`	`def test_float64_index_equals():`