Skip to content

Commit 957fc3c

Browse files
authored
BUG/DEPR: loc.__setitem__ incorrectly accepting positional slices (#31840)
1 parent aa27b9a commit 957fc3c

File tree

15 files changed

+108
-58
lines changed

15 files changed

+108
-58
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ Deprecations
172172
~~~~~~~~~~~~
173173
- Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
174174
- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
175-
-
175+
- Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)
176176
-
177177

178178
.. ---------------------------------------------------------------------------

pandas/core/indexes/base.py

+10
Original file line numberDiff line numberDiff line change
@@ -3137,8 +3137,18 @@ def is_int(v):
31373137
pass
31383138

31393139
if com.is_null_slice(key):
3140+
# It doesn't matter if we are positional or label based
31403141
indexer = key
31413142
elif is_positional:
3143+
if kind == "loc":
3144+
# GH#16121, GH#24612, GH#31810
3145+
warnings.warn(
3146+
"Slicing a positional slice with .loc is not supported, "
3147+
"and will raise TypeError in a future version. "
3148+
"Use .loc with labels or .iloc with positions instead.",
3149+
FutureWarning,
3150+
stacklevel=6,
3151+
)
31423152
indexer = key
31433153
else:
31443154
indexer = self.slice_indexer(start, stop, step, kind=kind)

pandas/tests/frame/conftest.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ def float_frame_with_na():
4040
"""
4141
df = DataFrame(tm.getSeriesData())
4242
# set some NAs
43-
df.loc[5:10] = np.nan
44-
df.loc[15:20, -2:] = np.nan
43+
df.iloc[5:10] = np.nan
44+
df.iloc[15:20, -2:] = np.nan
4545
return df
4646

4747

@@ -74,8 +74,8 @@ def bool_frame_with_na():
7474
df = DataFrame(tm.getSeriesData()) > 0
7575
df = df.astype(object)
7676
# set some NAs
77-
df.loc[5:10] = np.nan
78-
df.loc[15:20, -2:] = np.nan
77+
df.iloc[5:10] = np.nan
78+
df.iloc[15:20, -2:] = np.nan
7979
return df
8080

8181

pandas/tests/frame/indexing/test_indexing.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1209,7 +1209,7 @@ def test_setitem_frame_mixed(self, float_string_frame):
12091209
piece = DataFrame(
12101210
[[1.0, 2.0], [3.0, 4.0]], index=f.index[0:2], columns=["A", "B"]
12111211
)
1212-
key = (slice(None, 2), ["A", "B"])
1212+
key = (f.index[slice(None, 2)], ["A", "B"])
12131213
f.loc[key] = piece
12141214
tm.assert_almost_equal(f.loc[f.index[0:2], ["A", "B"]].values, piece.values)
12151215

@@ -1220,7 +1220,7 @@ def test_setitem_frame_mixed(self, float_string_frame):
12201220
index=list(f.index[0:2]) + ["foo", "bar"],
12211221
columns=["A", "B"],
12221222
)
1223-
key = (slice(None, 2), ["A", "B"])
1223+
key = (f.index[slice(None, 2)], ["A", "B"])
12241224
f.loc[key] = piece
12251225
tm.assert_almost_equal(
12261226
f.loc[f.index[0:2:], ["A", "B"]].values, piece.values[0:2]
@@ -1230,15 +1230,15 @@ def test_setitem_frame_mixed(self, float_string_frame):
12301230
f = float_string_frame.copy()
12311231
piece = f.loc[f.index[:2], ["A"]]
12321232
piece.index = f.index[-2:]
1233-
key = (slice(-2, None), ["A", "B"])
1233+
key = (f.index[slice(-2, None)], ["A", "B"])
12341234
f.loc[key] = piece
12351235
piece["B"] = np.nan
12361236
tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values)
12371237

12381238
# ndarray
12391239
f = float_string_frame.copy()
12401240
piece = float_string_frame.loc[f.index[:2], ["A", "B"]]
1241-
key = (slice(-2, None), ["A", "B"])
1241+
key = (f.index[slice(-2, None)], ["A", "B"])
12421242
f.loc[key] = piece.values
12431243
tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values)
12441244

@@ -1873,7 +1873,7 @@ def test_setitem_datetimelike_with_inference(self):
18731873
df = DataFrame(index=date_range("20130101", periods=4))
18741874
df["A"] = np.array([1 * one_hour] * 4, dtype="m8[ns]")
18751875
df.loc[:, "B"] = np.array([2 * one_hour] * 4, dtype="m8[ns]")
1876-
df.loc[:3, "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]")
1876+
df.loc[df.index[:3], "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]")
18771877
df.loc[:, "D"] = np.array([4 * one_hour] * 4, dtype="m8[ns]")
18781878
df.loc[df.index[:3], "E"] = np.array([5 * one_hour] * 3, dtype="m8[ns]")
18791879
df["F"] = np.timedelta64("NaT")

pandas/tests/frame/methods/test_asof.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class TestFrameAsof:
3131
def test_basic(self, date_range_frame):
3232
df = date_range_frame
3333
N = 50
34-
df.loc[15:30, "A"] = np.nan
34+
df.loc[df.index[15:30], "A"] = np.nan
3535
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
3636

3737
result = df.asof(dates)
@@ -51,7 +51,7 @@ def test_basic(self, date_range_frame):
5151
def test_subset(self, date_range_frame):
5252
N = 10
5353
df = date_range_frame.iloc[:N].copy()
54-
df.loc[4:8, "A"] = np.nan
54+
df.loc[df.index[4:8], "A"] = np.nan
5555
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
5656

5757
# with a subset of A should be the same
@@ -159,7 +159,7 @@ def test_is_copy(self, date_range_frame):
159159
# doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
160160
df = date_range_frame
161161
N = 50
162-
df.loc[15:30, "A"] = np.nan
162+
df.loc[df.index[15:30], "A"] = np.nan
163163
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
164164

165165
result = df.asof(dates)

pandas/tests/frame/test_analytics.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -913,8 +913,8 @@ def test_sum_bools(self):
913913

914914
def test_idxmin(self, float_frame, int_frame):
915915
frame = float_frame
916-
frame.loc[5:10] = np.nan
917-
frame.loc[15:20, -2:] = np.nan
916+
frame.iloc[5:10] = np.nan
917+
frame.iloc[15:20, -2:] = np.nan
918918
for skipna in [True, False]:
919919
for axis in [0, 1]:
920920
for df in [frame, int_frame]:
@@ -928,8 +928,8 @@ def test_idxmin(self, float_frame, int_frame):
928928

929929
def test_idxmax(self, float_frame, int_frame):
930930
frame = float_frame
931-
frame.loc[5:10] = np.nan
932-
frame.loc[15:20, -2:] = np.nan
931+
frame.iloc[5:10] = np.nan
932+
frame.iloc[15:20, -2:] = np.nan
933933
for skipna in [True, False]:
934934
for axis in [0, 1]:
935935
for df in [frame, int_frame]:

pandas/tests/frame/test_apply.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ def test_apply_yield_list(self, float_frame):
339339
tm.assert_frame_equal(result, float_frame)
340340

341341
def test_apply_reduce_Series(self, float_frame):
342-
float_frame.loc[::2, "A"] = np.nan
342+
float_frame["A"].iloc[::2] = np.nan
343343
expected = float_frame.mean(1)
344344
result = float_frame.apply(np.mean, axis=1)
345345
tm.assert_series_equal(result, expected)

pandas/tests/frame/test_block_internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ def test_convert_objects(self, float_string_frame):
478478
length = len(float_string_frame)
479479
float_string_frame["J"] = "1."
480480
float_string_frame["K"] = "1"
481-
float_string_frame.loc[0:5, ["J", "K"]] = "garbled"
481+
float_string_frame.loc[float_string_frame.index[0:5], ["J", "K"]] = "garbled"
482482
converted = float_string_frame._convert(datetime=True, numeric=True)
483483
assert converted["H"].dtype == "float64"
484484
assert converted["I"].dtype == "int64"

pandas/tests/frame/test_cumulative.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ def test_cumsum_corner(self):
2323
result = dm.cumsum() # noqa
2424

2525
def test_cumsum(self, datetime_frame):
26-
datetime_frame.loc[5:10, 0] = np.nan
27-
datetime_frame.loc[10:15, 1] = np.nan
28-
datetime_frame.loc[15:, 2] = np.nan
26+
datetime_frame.iloc[5:10, 0] = np.nan
27+
datetime_frame.iloc[10:15, 1] = np.nan
28+
datetime_frame.iloc[15:, 2] = np.nan
2929

3030
# axis = 0
3131
cumsum = datetime_frame.cumsum()
@@ -46,9 +46,9 @@ def test_cumsum(self, datetime_frame):
4646
assert np.shape(cumsum_xs) == np.shape(datetime_frame)
4747

4848
def test_cumprod(self, datetime_frame):
49-
datetime_frame.loc[5:10, 0] = np.nan
50-
datetime_frame.loc[10:15, 1] = np.nan
51-
datetime_frame.loc[15:, 2] = np.nan
49+
datetime_frame.iloc[5:10, 0] = np.nan
50+
datetime_frame.iloc[10:15, 1] = np.nan
51+
datetime_frame.iloc[15:, 2] = np.nan
5252

5353
# axis = 0
5454
cumprod = datetime_frame.cumprod()
@@ -80,9 +80,9 @@ def test_cumprod(self, datetime_frame):
8080
strict=False,
8181
)
8282
def test_cummin(self, datetime_frame):
83-
datetime_frame.loc[5:10, 0] = np.nan
84-
datetime_frame.loc[10:15, 1] = np.nan
85-
datetime_frame.loc[15:, 2] = np.nan
83+
datetime_frame.iloc[5:10, 0] = np.nan
84+
datetime_frame.iloc[10:15, 1] = np.nan
85+
datetime_frame.iloc[15:, 2] = np.nan
8686

8787
# axis = 0
8888
cummin = datetime_frame.cummin()
@@ -108,9 +108,9 @@ def test_cummin(self, datetime_frame):
108108
strict=False,
109109
)
110110
def test_cummax(self, datetime_frame):
111-
datetime_frame.loc[5:10, 0] = np.nan
112-
datetime_frame.loc[10:15, 1] = np.nan
113-
datetime_frame.loc[15:, 2] = np.nan
111+
datetime_frame.iloc[5:10, 0] = np.nan
112+
datetime_frame.iloc[10:15, 1] = np.nan
113+
datetime_frame.iloc[15:, 2] = np.nan
114114

115115
# axis = 0
116116
cummax = datetime_frame.cummax()

pandas/tests/frame/test_to_csv.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ def create_cols(name):
761761
)
762762

763763
# add in some nans
764-
df_float.loc[30:50, 1:3] = np.nan
764+
df_float.iloc[30:50, 1:3] = np.nan
765765

766766
# ## this is a bug in read_csv right now ####
767767
# df_dt.loc[30:50,1:3] = np.nan

pandas/tests/indexing/test_loc.py

+40
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,7 @@ def test_loc_setitem_empty_append_raises(self):
863863

864864
data = [1, 2]
865865
df = DataFrame(columns=["x", "y"])
866+
df.index = df.index.astype(np.int64)
866867
msg = (
867868
r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] "
868869
r"are in the \[index\]"
@@ -975,3 +976,42 @@ def test_loc_mixed_int_float():
975976

976977
result = ser.loc[1]
977978
assert result == 0
979+
980+
981+
def test_loc_with_positional_slice_deprecation():
982+
# GH#31840
983+
ser = pd.Series(range(4), index=["A", "B", "C", "D"])
984+
985+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
986+
ser.loc[:3] = 2
987+
988+
expected = pd.Series([2, 2, 2, 3], index=["A", "B", "C", "D"])
989+
tm.assert_series_equal(ser, expected)
990+
991+
992+
def test_loc_slice_disallows_positional():
993+
# GH#16121, GH#24612, GH#31810
994+
dti = pd.date_range("2016-01-01", periods=3)
995+
df = pd.DataFrame(np.random.random((3, 2)), index=dti)
996+
997+
ser = df[0]
998+
999+
msg = (
1000+
"cannot do slice indexing on DatetimeIndex with these "
1001+
r"indexers \[1\] of type int"
1002+
)
1003+
1004+
for obj in [df, ser]:
1005+
with pytest.raises(TypeError, match=msg):
1006+
obj.loc[1:3]
1007+
1008+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
1009+
# GH#31840 deprecated incorrect behavior
1010+
obj.loc[1:3] = 1
1011+
1012+
with pytest.raises(TypeError, match=msg):
1013+
df.loc[1:3, 1]
1014+
1015+
with tm.assert_produces_warning(FutureWarning):
1016+
# GH#31840 deprecated incorrect behavior
1017+
df.loc[1:3, 1] = 2

pandas/tests/io/pytables/test_store.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ def test_repr(self, setup_path):
342342
df["timestamp2"] = Timestamp("20010103")
343343
df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
344344
df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
345-
df.loc[3:6, ["obj1"]] = np.nan
345+
df.loc[df.index[3:6], ["obj1"]] = np.nan
346346
df = df._consolidate()._convert(datetime=True)
347347

348348
with catch_warnings(record=True):
@@ -846,7 +846,7 @@ def test_put_mixed_type(self, setup_path):
846846
df["timestamp2"] = Timestamp("20010103")
847847
df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
848848
df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
849-
df.loc[3:6, ["obj1"]] = np.nan
849+
df.loc[df.index[3:6], ["obj1"]] = np.nan
850850
df = df._consolidate()._convert(datetime=True)
851851

852852
with ensure_clean_store(setup_path) as store:
@@ -1372,11 +1372,11 @@ def check_col(key, name, size):
13721372
_maybe_remove(store, "df")
13731373
df = tm.makeTimeDataFrame()
13741374
df["string"] = "foo"
1375-
df.loc[1:4, "string"] = np.nan
1375+
df.loc[df.index[1:4], "string"] = np.nan
13761376
df["string2"] = "bar"
1377-
df.loc[4:8, "string2"] = np.nan
1377+
df.loc[df.index[4:8], "string2"] = np.nan
13781378
df["string3"] = "bah"
1379-
df.loc[1:, "string3"] = np.nan
1379+
df.loc[df.index[1:], "string3"] = np.nan
13801380
store.append("df", df)
13811381
result = store.select("df")
13821382
tm.assert_frame_equal(result, df)
@@ -1492,8 +1492,8 @@ def test_append_with_data_columns(self, setup_path):
14921492
# data column selection with a string data_column
14931493
df_new = df.copy()
14941494
df_new["string"] = "foo"
1495-
df_new.loc[1:4, "string"] = np.nan
1496-
df_new.loc[5:6, "string"] = "bar"
1495+
df_new.loc[df_new.index[1:4], "string"] = np.nan
1496+
df_new.loc[df_new.index[5:6], "string"] = "bar"
14971497
_maybe_remove(store, "df")
14981498
store.append("df", df_new, data_columns=["string"])
14991499
result = store.select("df", "string='foo'")
@@ -1574,12 +1574,12 @@ def check_col(key, name, size):
15741574
# doc example
15751575
df_dc = df.copy()
15761576
df_dc["string"] = "foo"
1577-
df_dc.loc[4:6, "string"] = np.nan
1578-
df_dc.loc[7:9, "string"] = "bar"
1577+
df_dc.loc[df_dc.index[4:6], "string"] = np.nan
1578+
df_dc.loc[df_dc.index[7:9], "string"] = "bar"
15791579
df_dc["string2"] = "cool"
15801580
df_dc["datetime"] = Timestamp("20010102")
15811581
df_dc = df_dc._convert(datetime=True)
1582-
df_dc.loc[3:5, ["A", "B", "datetime"]] = np.nan
1582+
df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan
15831583

15841584
_maybe_remove(store, "df_dc")
15851585
store.append(
@@ -1602,8 +1602,8 @@ def check_col(key, name, size):
16021602
np.random.randn(8, 3), index=index, columns=["A", "B", "C"]
16031603
)
16041604
df_dc["string"] = "foo"
1605-
df_dc.loc[4:6, "string"] = np.nan
1606-
df_dc.loc[7:9, "string"] = "bar"
1605+
df_dc.loc[df_dc.index[4:6], "string"] = np.nan
1606+
df_dc.loc[df_dc.index[7:9], "string"] = "bar"
16071607
df_dc.loc[:, ["B", "C"]] = df_dc.loc[:, ["B", "C"]].abs()
16081608
df_dc["string2"] = "cool"
16091609

@@ -2024,7 +2024,7 @@ def test_table_mixed_dtypes(self, setup_path):
20242024
df["timestamp2"] = Timestamp("20010103")
20252025
df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
20262026
df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
2027-
df.loc[3:6, ["obj1"]] = np.nan
2027+
df.loc[df.index[3:6], ["obj1"]] = np.nan
20282028
df = df._consolidate()._convert(datetime=True)
20292029

20302030
with ensure_clean_store(setup_path) as store:
@@ -2200,7 +2200,7 @@ def test_invalid_terms(self, setup_path):
22002200

22012201
df = tm.makeTimeDataFrame()
22022202
df["string"] = "foo"
2203-
df.loc[0:4, "string"] = "bar"
2203+
df.loc[df.index[0:4], "string"] = "bar"
22042204

22052205
store.put("df", df, format="table")
22062206

@@ -3343,7 +3343,7 @@ def test_string_select(self, setup_path):
33433343

33443344
# test string ==/!=
33453345
df["x"] = "none"
3346-
df.loc[2:7, "x"] = ""
3346+
df.loc[df.index[2:7], "x"] = ""
33473347

33483348
store.append("df", df, data_columns=["x"])
33493349

@@ -3365,7 +3365,7 @@ def test_string_select(self, setup_path):
33653365

33663366
# int ==/!=
33673367
df["int"] = 1
3368-
df.loc[2:7, "int"] = 2
3368+
df.loc[df.index[2:7], "int"] = 2
33693369

33703370
store.append("df3", df, data_columns=["int"])
33713371

@@ -3419,7 +3419,7 @@ def test_read_column(self, setup_path):
34193419
# a data column with NaNs, result excludes the NaNs
34203420
df3 = df.copy()
34213421
df3["string"] = "foo"
3422-
df3.loc[4:6, "string"] = np.nan
3422+
df3.loc[df3.index[4:6], "string"] = np.nan
34233423
store.append("df3", df3, data_columns=["string"])
34243424
result = store.select_column("df3", "string")
34253425
tm.assert_almost_equal(result.values, df3["string"].values)

0 commit comments

Comments
 (0)