Skip to content

Commit 51675d0

Browse files
authored
DEPR: inconsistent series[i:j] slicing with Int64Index GH#45162 (#45324)
1 parent f2a0125 commit 51675d0

22 files changed

+145
-37
lines changed

doc/source/whatsnew/v1.5.0.rst

+44
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,50 @@ Other API changes
9494

9595
Deprecations
9696
~~~~~~~~~~~~
97+
98+
.. _whatsnew_150.deprecations.int_slicing_series:
99+
100+
In a future version, integer slicing on a :class:`Series` with a :class:`Int64Index` or :class:`RangeIndex` will be treated as *label-based*, not positional. This will make the behavior consistent with other :meth:`Series.__getitem__` and :meth:`Series.__setitem__` behaviors (:issue:`45162`).
101+
102+
For example:
103+
104+
.. ipython:: python
105+
106+
ser = pd.Series([1, 2, 3, 4, 5], index=[2, 3, 5, 7, 11])
107+
108+
In the old behavior, ``ser[2:4]`` treats the slice as positional:
109+
110+
*Old behavior*:
111+
112+
.. code-block:: ipython
113+
114+
In [3]: ser[2:4]
115+
Out[3]:
116+
5 3
117+
7 4
118+
dtype: int64
119+
120+
In a future version, this will be treated as label-based:
121+
122+
*Future behavior*:
123+
124+
.. code-block:: ipython
125+
126+
In [4]: ser.loc[2:4]
127+
Out[4]:
128+
2 1
129+
3 2
130+
dtype: int64
131+
132+
To retain the old behavior, use ``series.iloc[i:j]``. To get the future behavior,
133+
use ``series.loc[i:j]``.
134+
135+
Slicing on a :class:`DataFrame` will not be affected.
136+
137+
.. _whatsnew_150.deprecations.other:
138+
139+
Other Deprecations
140+
^^^^^^^^^^^^^^^^^^
97141
- Deprecated the keyword ``line_terminator`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead; this is for consistency with :func:`read_csv` and the standard library 'csv' module (:issue:`9568`)
98142
- Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`)
99143
-

pandas/core/indexes/base.py

+42-1
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
ABCDatetimeIndex,
111111
ABCMultiIndex,
112112
ABCPeriodIndex,
113+
ABCRangeIndex,
113114
ABCSeries,
114115
ABCTimedeltaIndex,
115116
)
@@ -3989,7 +3990,7 @@ def _validate_positional_slice(self, key: slice) -> None:
39893990
self._validate_indexer("positional", key.stop, "iloc")
39903991
self._validate_indexer("positional", key.step, "iloc")
39913992

3992-
def _convert_slice_indexer(self, key: slice, kind: str_t):
3993+
def _convert_slice_indexer(self, key: slice, kind: str_t, is_frame: bool = False):
39933994
"""
39943995
Convert a slice indexer.
39953996
@@ -4000,6 +4001,9 @@ def _convert_slice_indexer(self, key: slice, kind: str_t):
40004001
----------
40014002
key : label of the slice bound
40024003
kind : {'loc', 'getitem'}
4004+
is_frame : bool, default False
4005+
Whether this is a slice called on DataFrame.__getitem__
4006+
as opposed to Series.__getitem__
40034007
"""
40044008
assert kind in ["loc", "getitem"], kind
40054009

@@ -4020,7 +4024,44 @@ def is_int(v):
40204024
called from the getitem slicers, validate that we are in fact
40214025
integers
40224026
"""
4027+
if self.is_integer():
4028+
if is_frame:
4029+
# unambiguously positional, no deprecation
4030+
pass
4031+
elif start is None and stop is None:
4032+
# label-based vs positional is irrelevant
4033+
pass
4034+
elif isinstance(self, ABCRangeIndex) and self._range == range(
4035+
len(self)
4036+
):
4037+
# In this case there is no difference between label-based
4038+
# and positional, so nothing will change.
4039+
pass
4040+
elif (
4041+
self.dtype.kind in ["i", "u"]
4042+
and self._is_strictly_monotonic_increasing
4043+
and len(self) > 0
4044+
and self[0] == 0
4045+
and self[-1] == len(self) - 1
4046+
):
4047+
# We are range-like, e.g. created with Index(np.arange(N))
4048+
pass
4049+
elif not is_index_slice:
4050+
# we're going to raise, so don't bother warning, e.g.
4051+
# test_integer_positional_indexing
4052+
pass
4053+
else:
4054+
warnings.warn(
4055+
"The behavior of `series[i:j]` with an integer-dtype index "
4056+
"is deprecated. In a future version, this will be treated "
4057+
"as *label-based* indexing, consistent with e.g. `series[i]` "
4058+
"lookups. To retain the old behavior, use `series.iloc[i:j]`. "
4059+
"To get the future behavior, use `series.loc[i:j]`.",
4060+
FutureWarning,
4061+
stacklevel=find_stack_level(),
4062+
)
40234063
if self.is_integer() or is_index_slice:
4064+
# Note: these checks are redundant if we know is_index_slice
40244065
self._validate_indexer("slice", key.start, "getitem")
40254066
self._validate_indexer("slice", key.stop, "getitem")
40264067
self._validate_indexer("slice", key.step, "getitem")

pandas/core/indexes/interval.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -754,7 +754,7 @@ def _index_as_unique(self) -> bool:
754754
"cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"
755755
)
756756

757-
def _convert_slice_indexer(self, key: slice, kind: str):
757+
def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
758758
if not (key.step is None or key.step == 1):
759759
# GH#31658 if label-based, we require step == 1,
760760
# if positional, we disallow float start/stop
@@ -766,7 +766,7 @@ def _convert_slice_indexer(self, key: slice, kind: str):
766766
# i.e. this cannot be interpreted as a positional slice
767767
raise ValueError(msg)
768768

769-
return super()._convert_slice_indexer(key, kind)
769+
return super()._convert_slice_indexer(key, kind, is_frame=is_frame)
770770

771771
@cache_readonly
772772
def _should_fallback_to_positional(self) -> bool:

pandas/core/indexes/numeric.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -268,15 +268,15 @@ def _should_fallback_to_positional(self) -> bool:
268268
return False
269269

270270
@doc(Index._convert_slice_indexer)
271-
def _convert_slice_indexer(self, key: slice, kind: str):
271+
def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
272272
if is_float_dtype(self.dtype):
273273
assert kind in ["loc", "getitem"]
274274

275275
# We always treat __getitem__ slicing as label-based
276276
# translate to locations
277277
return self.slice_indexer(key.start, key.stop, key.step)
278278

279-
return super()._convert_slice_indexer(key, kind=kind)
279+
return super()._convert_slice_indexer(key, kind=kind, is_frame=is_frame)
280280

281281
@doc(Index._maybe_cast_slice_bound)
282282
def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2313,7 +2313,7 @@ def convert_to_index_sliceable(obj: DataFrame, key):
23132313
"""
23142314
idx = obj.index
23152315
if isinstance(key, slice):
2316-
return idx._convert_slice_indexer(key, kind="getitem")
2316+
return idx._convert_slice_indexer(key, kind="getitem", is_frame=True)
23172317

23182318
elif isinstance(key, str):
23192319

pandas/tests/extension/base/getitem.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,8 @@ def test_get(self, data):
313313
expected = s.iloc[[2, 3]]
314314
self.assert_series_equal(result, expected)
315315

316-
result = s.get(slice(2))
316+
with tm.assert_produces_warning(FutureWarning, match="label-based"):
317+
result = s.get(slice(2))
317318
expected = s.iloc[[0, 1]]
318319
self.assert_series_equal(result, expected)
319320

@@ -336,7 +337,9 @@ def test_get(self, data):
336337

337338
# GH 21257
338339
s = pd.Series(data)
339-
s2 = s[::2]
340+
with tm.assert_produces_warning(None):
341+
# GH#45324 make sure we aren't giving a spurious FutureWarning
342+
s2 = s[::2]
340343
assert s2.get(1) is None
341344

342345
def test_take_sequence(self, data):

pandas/tests/frame/indexing/test_indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,7 @@ def test_iloc_row_slice_view(self, using_array_manager):
10091009
exp_col = original[2].copy()
10101010
# TODO(ArrayManager) verify it is expected that the original didn't change
10111011
if not using_array_manager:
1012-
exp_col[4:8] = 0.0
1012+
exp_col._values[4:8] = 0.0
10131013
tm.assert_series_equal(df[2], exp_col)
10141014

10151015
def test_iloc_col(self):

pandas/tests/groupby/test_apply.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -367,11 +367,11 @@ def test_apply_frame_not_as_index_column_name(df):
367367

368368
def test_apply_frame_concat_series():
369369
def trans(group):
370-
return group.groupby("B")["C"].sum().sort_values()[:2]
370+
return group.groupby("B")["C"].sum().sort_values().iloc[:2]
371371

372372
def trans2(group):
373373
grouped = group.groupby(df.reindex(group.index)["B"])
374-
return grouped.sum().sort_values()[:2]
374+
return grouped.sum().sort_values().iloc[:2]
375375

376376
df = DataFrame(
377377
{
@@ -409,7 +409,7 @@ def test_apply_chunk_view():
409409
# Low level tinkering could be unsafe, make sure not
410410
df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)})
411411

412-
result = df.groupby("key", group_keys=False).apply(lambda x: x[:2])
412+
result = df.groupby("key", group_keys=False).apply(lambda x: x.iloc[:2])
413413
expected = df.take([0, 1, 3, 4, 6, 7])
414414
tm.assert_frame_equal(result, expected)
415415

pandas/tests/indexes/numeric/test_numeric.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -657,7 +657,7 @@ def test_uint_index_does_not_convert_to_float64(box):
657657
)
658658
tm.assert_index_equal(result.index, expected)
659659

660-
tm.assert_equal(result, series[:3])
660+
tm.assert_equal(result, series.iloc[:3])
661661

662662

663663
def test_float64_index_equals():

pandas/tests/indexing/test_floats.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,8 @@ def test_integer_positional_indexing(self, idx):
343343
"""
344344
s = Series(range(2, 6), index=range(2, 6))
345345

346-
result = s[2:4]
346+
with tm.assert_produces_warning(FutureWarning, match="label-based"):
347+
result = s[2:4]
347348
expected = s.iloc[2:4]
348349
tm.assert_series_equal(result, expected)
349350

pandas/tests/indexing/test_iloc.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1386,8 +1386,10 @@ def test_iloc(self):
13861386
tm.assert_series_equal(result, expected)
13871387

13881388
# test slice is a view
1389-
result[:] = 0
1390-
assert (ser[1:3] == 0).all()
1389+
with tm.assert_produces_warning(None):
1390+
# GH#45324 make sure we aren't giving a spurious FutureWarning
1391+
result[:] = 0
1392+
assert (ser.iloc[1:3] == 0).all()
13911393

13921394
# list of integers
13931395
result = ser.iloc[[0, 2, 3, 4, 5]]

pandas/tests/indexing/test_indexing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ def test_setitem_ndarray_1d_2(self):
6868

6969
msg = "Must have equal len keys and value when setting with an iterable"
7070
with pytest.raises(ValueError, match=msg):
71-
df[2:5] = np.arange(1, 4) * 1j
71+
with tm.assert_produces_warning(FutureWarning, match="label-based"):
72+
df[2:5] = np.arange(1, 4) * 1j
7273

7374
def test_getitem_ndarray_3d(
7475
self, index, frame_or_series, indexer_sli, using_array_manager

pandas/tests/series/indexing/test_get.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,8 @@ def test_get_with_ea(arr):
167167
expected = ser.iloc[[2, 3]]
168168
tm.assert_series_equal(result, expected)
169169

170-
result = ser.get(slice(2))
170+
with tm.assert_produces_warning(FutureWarning, match="label-based"):
171+
result = ser.get(slice(2))
171172
expected = ser.iloc[[0, 1]]
172173
tm.assert_series_equal(result, expected)
173174

pandas/tests/series/indexing/test_getitem.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,8 @@ def test_getitem_slice_bug(self):
332332
def test_getitem_slice_integers(self):
333333
ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
334334

335-
result = ser[:4]
335+
with tm.assert_produces_warning(FutureWarning, match="label-based"):
336+
result = ser[:4]
336337
expected = Series(ser.values[:4], index=[2, 4, 6, 8])
337338
tm.assert_series_equal(result, expected)
338339

pandas/tests/series/indexing/test_setitem.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,15 @@ def test_setitem_slice(self):
215215
def test_setitem_slice_integers(self):
216216
ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])
217217

218-
ser[:4] = 0
219-
assert (ser[:4] == 0).all()
220-
assert not (ser[4:] == 0).any()
218+
msg = r"In a future version, this will be treated as \*label-based\* indexing"
219+
with tm.assert_produces_warning(FutureWarning, match=msg):
220+
ser[:4] = 0
221+
with tm.assert_produces_warning(
222+
FutureWarning, match=msg, check_stacklevel=False
223+
):
224+
assert (ser[:4] == 0).all()
225+
with tm.assert_produces_warning(FutureWarning, match=msg):
226+
assert not (ser[4:] == 0).any()
221227

222228
def test_setitem_slicestep(self):
223229
# caught this bug when writing tests

pandas/tests/series/methods/test_item.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,5 +55,5 @@ def test_item(self):
5555

5656
# Case where ser[0] would not work
5757
ser = Series(dti, index=[5, 6])
58-
val = ser[:1].item()
58+
val = ser.iloc[:1].item()
5959
assert val == dti[0]

pandas/tests/window/test_apply.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,8 @@ def test_center(raw):
214214
expected = (
215215
concat([obj, Series([np.NaN] * 9)])
216216
.rolling(20, min_periods=15)
217-
.apply(f, raw=raw)[9:]
217+
.apply(f, raw=raw)
218+
.iloc[9:]
218219
.reset_index(drop=True)
219220
)
220221
tm.assert_series_equal(result, expected)

pandas/tests/window/test_ewm.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -552,11 +552,11 @@ def test_ew_min_periods(min_periods, name):
552552

553553
@pytest.mark.parametrize("name", ["cov", "corr"])
554554
def test_ewm_corr_cov(name):
555-
A = Series(np.random.randn(50), index=np.arange(50))
555+
A = Series(np.random.randn(50), index=range(50))
556556
B = A[2:] + np.random.randn(48)
557557

558558
A[:10] = np.NaN
559-
B[-10:] = np.NaN
559+
B.iloc[-10:] = np.NaN
560560

561561
result = getattr(A.ewm(com=20, min_periods=5), name)(B)
562562
assert np.isnan(result.values[:14]).all()
@@ -567,11 +567,11 @@ def test_ewm_corr_cov(name):
567567
@pytest.mark.parametrize("name", ["cov", "corr"])
568568
def test_ewm_corr_cov_min_periods(name, min_periods):
569569
# GH 7898
570-
A = Series(np.random.randn(50), index=np.arange(50))
570+
A = Series(np.random.randn(50), index=range(50))
571571
B = A[2:] + np.random.randn(48)
572572

573573
A[:10] = np.NaN
574-
B[-10:] = np.NaN
574+
B.iloc[-10:] = np.NaN
575575

576576
result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B)
577577
# binary functions (ewmcov, ewmcorr) with bias=False require at
@@ -593,7 +593,7 @@ def test_ewm_corr_cov_min_periods(name, min_periods):
593593

594594
@pytest.mark.parametrize("name", ["cov", "corr"])
595595
def test_different_input_array_raise_exception(name):
596-
A = Series(np.random.randn(50), index=np.arange(50))
596+
A = Series(np.random.randn(50), index=range(50))
597597
A[:10] = np.NaN
598598

599599
msg = "other must be a DataFrame or Series"

pandas/tests/window/test_rolling.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1391,7 +1391,7 @@ def test_rolling_corr_timedelta_index(index, window):
13911391
# GH: 31286
13921392
x = Series([1, 2, 3, 4, 5], index=index)
13931393
y = x.copy()
1394-
x[0:2] = 0.0
1394+
x.iloc[0:2] = 0.0
13951395
result = x.rolling(window).corr(y)
13961396
expected = Series([np.nan, np.nan, 1, 1, 1], index=index)
13971397
tm.assert_almost_equal(result, expected)

pandas/tests/window/test_rolling_functions.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -247,9 +247,13 @@ def test_center(roll_func, kwargs, minp):
247247
result = getattr(obj.rolling(20, min_periods=minp, center=True), roll_func)(
248248
**kwargs
249249
)
250-
expected = getattr(
251-
concat([obj, Series([np.NaN] * 9)]).rolling(20, min_periods=minp), roll_func
252-
)(**kwargs)[9:].reset_index(drop=True)
250+
expected = (
251+
getattr(
252+
concat([obj, Series([np.NaN] * 9)]).rolling(20, min_periods=minp), roll_func
253+
)(**kwargs)
254+
.iloc[9:]
255+
.reset_index(drop=True)
256+
)
253257
tm.assert_series_equal(result, expected)
254258

255259

pandas/tests/window/test_rolling_quantile.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,8 @@ def test_center(q):
133133
expected = (
134134
concat([obj, Series([np.NaN] * 9)])
135135
.rolling(20)
136-
.quantile(q)[9:]
136+
.quantile(q)
137+
.iloc[9:]
137138
.reset_index(drop=True)
138139
)
139140
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)