BUG/DEPR: loc.__setitem__ incorrectly accepting positional slices (#31840)

jbrockmendel · web-flow · commit 957fc3c43808 · 2020-03-08T11:45:25.000-04:00
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -172,7 +172,7 @@ Deprecations
 ~~~~~~~~~~~~
 - Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version.  Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
 - :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
--
+- Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version.  Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -3137,8 +3137,18 @@ def is_int(v):
                 pass
 
         if com.is_null_slice(key):
+            # It doesn't matter if we are positional or label based
             indexer = key
         elif is_positional:
+            if kind == "loc":
+                # GH#16121, GH#24612, GH#31810
+                warnings.warn(
+                    "Slicing a positional slice with .loc is not supported, "
+                    "and will raise TypeError in a future version.  "
+                    "Use .loc with labels or .iloc with positions instead.",
+                    FutureWarning,
+                    stacklevel=6,
+                )
             indexer = key
         else:
             indexer = self.slice_indexer(start, stop, step, kind=kind)
diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py
@@ -40,8 +40,8 @@ def float_frame_with_na():
     """
     df = DataFrame(tm.getSeriesData())
     # set some NAs
-    df.loc[5:10] = np.nan
-    df.loc[15:20, -2:] = np.nan
+    df.iloc[5:10] = np.nan
+    df.iloc[15:20, -2:] = np.nan
     return df
 
 
@@ -74,8 +74,8 @@ def bool_frame_with_na():
     df = DataFrame(tm.getSeriesData()) > 0
     df = df.astype(object)
     # set some NAs
-    df.loc[5:10] = np.nan
-    df.loc[15:20, -2:] = np.nan
+    df.iloc[5:10] = np.nan
+    df.iloc[15:20, -2:] = np.nan
     return df
 
 
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -1209,7 +1209,7 @@ def test_setitem_frame_mixed(self, float_string_frame):
         piece = DataFrame(
             [[1.0, 2.0], [3.0, 4.0]], index=f.index[0:2], columns=["A", "B"]
         )
-        key = (slice(None, 2), ["A", "B"])
+        key = (f.index[slice(None, 2)], ["A", "B"])
         f.loc[key] = piece
         tm.assert_almost_equal(f.loc[f.index[0:2], ["A", "B"]].values, piece.values)
 
@@ -1220,7 +1220,7 @@ def test_setitem_frame_mixed(self, float_string_frame):
             index=list(f.index[0:2]) + ["foo", "bar"],
             columns=["A", "B"],
         )
-        key = (slice(None, 2), ["A", "B"])
+        key = (f.index[slice(None, 2)], ["A", "B"])
         f.loc[key] = piece
         tm.assert_almost_equal(
             f.loc[f.index[0:2:], ["A", "B"]].values, piece.values[0:2]
@@ -1230,15 +1230,15 @@ def test_setitem_frame_mixed(self, float_string_frame):
         f = float_string_frame.copy()
         piece = f.loc[f.index[:2], ["A"]]
         piece.index = f.index[-2:]
-        key = (slice(-2, None), ["A", "B"])
+        key = (f.index[slice(-2, None)], ["A", "B"])
         f.loc[key] = piece
         piece["B"] = np.nan
         tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values)
 
         # ndarray
         f = float_string_frame.copy()
         piece = float_string_frame.loc[f.index[:2], ["A", "B"]]
-        key = (slice(-2, None), ["A", "B"])
+        key = (f.index[slice(-2, None)], ["A", "B"])
         f.loc[key] = piece.values
         tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values)
 
@@ -1873,7 +1873,7 @@ def test_setitem_datetimelike_with_inference(self):
         df = DataFrame(index=date_range("20130101", periods=4))
         df["A"] = np.array([1 * one_hour] * 4, dtype="m8[ns]")
         df.loc[:, "B"] = np.array([2 * one_hour] * 4, dtype="m8[ns]")
-        df.loc[:3, "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]")
+        df.loc[df.index[:3], "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]")
         df.loc[:, "D"] = np.array([4 * one_hour] * 4, dtype="m8[ns]")
         df.loc[df.index[:3], "E"] = np.array([5 * one_hour] * 3, dtype="m8[ns]")
         df["F"] = np.timedelta64("NaT")
diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py
@@ -31,7 +31,7 @@ class TestFrameAsof:
     def test_basic(self, date_range_frame):
         df = date_range_frame
         N = 50
-        df.loc[15:30, "A"] = np.nan
+        df.loc[df.index[15:30], "A"] = np.nan
         dates = date_range("1/1/1990", periods=N * 3, freq="25s")
 
         result = df.asof(dates)
@@ -51,7 +51,7 @@ def test_basic(self, date_range_frame):
     def test_subset(self, date_range_frame):
         N = 10
         df = date_range_frame.iloc[:N].copy()
-        df.loc[4:8, "A"] = np.nan
+        df.loc[df.index[4:8], "A"] = np.nan
         dates = date_range("1/1/1990", periods=N * 3, freq="25s")
 
         # with a subset of A should be the same
@@ -159,7 +159,7 @@ def test_is_copy(self, date_range_frame):
         # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
         df = date_range_frame
         N = 50
-        df.loc[15:30, "A"] = np.nan
+        df.loc[df.index[15:30], "A"] = np.nan
         dates = date_range("1/1/1990", periods=N * 3, freq="25s")
 
         result = df.asof(dates)
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -913,8 +913,8 @@ def test_sum_bools(self):
 
     def test_idxmin(self, float_frame, int_frame):
         frame = float_frame
-        frame.loc[5:10] = np.nan
-        frame.loc[15:20, -2:] = np.nan
+        frame.iloc[5:10] = np.nan
+        frame.iloc[15:20, -2:] = np.nan
         for skipna in [True, False]:
             for axis in [0, 1]:
                 for df in [frame, int_frame]:
@@ -928,8 +928,8 @@ def test_idxmin(self, float_frame, int_frame):
 
     def test_idxmax(self, float_frame, int_frame):
         frame = float_frame
-        frame.loc[5:10] = np.nan
-        frame.loc[15:20, -2:] = np.nan
+        frame.iloc[5:10] = np.nan
+        frame.iloc[15:20, -2:] = np.nan
         for skipna in [True, False]:
             for axis in [0, 1]:
                 for df in [frame, int_frame]:
diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py
@@ -339,7 +339,7 @@ def test_apply_yield_list(self, float_frame):
         tm.assert_frame_equal(result, float_frame)
 
     def test_apply_reduce_Series(self, float_frame):
-        float_frame.loc[::2, "A"] = np.nan
+        float_frame["A"].iloc[::2] = np.nan
         expected = float_frame.mean(1)
         result = float_frame.apply(np.mean, axis=1)
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
@@ -478,7 +478,7 @@ def test_convert_objects(self, float_string_frame):
         length = len(float_string_frame)
         float_string_frame["J"] = "1."
         float_string_frame["K"] = "1"
-        float_string_frame.loc[0:5, ["J", "K"]] = "garbled"
+        float_string_frame.loc[float_string_frame.index[0:5], ["J", "K"]] = "garbled"
         converted = float_string_frame._convert(datetime=True, numeric=True)
         assert converted["H"].dtype == "float64"
         assert converted["I"].dtype == "int64"
diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py
@@ -23,9 +23,9 @@ def test_cumsum_corner(self):
         result = dm.cumsum()  # noqa
 
     def test_cumsum(self, datetime_frame):
-        datetime_frame.loc[5:10, 0] = np.nan
-        datetime_frame.loc[10:15, 1] = np.nan
-        datetime_frame.loc[15:, 2] = np.nan
+        datetime_frame.iloc[5:10, 0] = np.nan
+        datetime_frame.iloc[10:15, 1] = np.nan
+        datetime_frame.iloc[15:, 2] = np.nan
 
         # axis = 0
         cumsum = datetime_frame.cumsum()
@@ -46,9 +46,9 @@ def test_cumsum(self, datetime_frame):
         assert np.shape(cumsum_xs) == np.shape(datetime_frame)
 
     def test_cumprod(self, datetime_frame):
-        datetime_frame.loc[5:10, 0] = np.nan
-        datetime_frame.loc[10:15, 1] = np.nan
-        datetime_frame.loc[15:, 2] = np.nan
+        datetime_frame.iloc[5:10, 0] = np.nan
+        datetime_frame.iloc[10:15, 1] = np.nan
+        datetime_frame.iloc[15:, 2] = np.nan
 
         # axis = 0
         cumprod = datetime_frame.cumprod()
@@ -80,9 +80,9 @@ def test_cumprod(self, datetime_frame):
         strict=False,
     )
     def test_cummin(self, datetime_frame):
-        datetime_frame.loc[5:10, 0] = np.nan
-        datetime_frame.loc[10:15, 1] = np.nan
-        datetime_frame.loc[15:, 2] = np.nan
+        datetime_frame.iloc[5:10, 0] = np.nan
+        datetime_frame.iloc[10:15, 1] = np.nan
+        datetime_frame.iloc[15:, 2] = np.nan
 
         # axis = 0
         cummin = datetime_frame.cummin()
@@ -108,9 +108,9 @@ def test_cummin(self, datetime_frame):
         strict=False,
     )
     def test_cummax(self, datetime_frame):
-        datetime_frame.loc[5:10, 0] = np.nan
-        datetime_frame.loc[10:15, 1] = np.nan
-        datetime_frame.loc[15:, 2] = np.nan
+        datetime_frame.iloc[5:10, 0] = np.nan
+        datetime_frame.iloc[10:15, 1] = np.nan
+        datetime_frame.iloc[15:, 2] = np.nan
 
         # axis = 0
         cummax = datetime_frame.cummax()
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -761,7 +761,7 @@ def create_cols(name):
         )
 
         # add in some nans
-        df_float.loc[30:50, 1:3] = np.nan
+        df_float.iloc[30:50, 1:3] = np.nan
 
         # ## this is a bug in read_csv right now ####
         # df_dt.loc[30:50,1:3] = np.nan
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -863,6 +863,7 @@ def test_loc_setitem_empty_append_raises(self):
 
         data = [1, 2]
         df = DataFrame(columns=["x", "y"])
+        df.index = df.index.astype(np.int64)
         msg = (
             r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] "
             r"are in the \[index\]"
@@ -975,3 +976,42 @@ def test_loc_mixed_int_float():
 
     result = ser.loc[1]
     assert result == 0
+
+
+def test_loc_with_positional_slice_deprecation():
+    # GH#31840
+    ser = pd.Series(range(4), index=["A", "B", "C", "D"])
+
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+        ser.loc[:3] = 2
+
+    expected = pd.Series([2, 2, 2, 3], index=["A", "B", "C", "D"])
+    tm.assert_series_equal(ser, expected)
+
+
+def test_loc_slice_disallows_positional():
+    # GH#16121, GH#24612, GH#31810
+    dti = pd.date_range("2016-01-01", periods=3)
+    df = pd.DataFrame(np.random.random((3, 2)), index=dti)
+
+    ser = df[0]
+
+    msg = (
+        "cannot do slice indexing on DatetimeIndex with these "
+        r"indexers \[1\] of type int"
+    )
+
+    for obj in [df, ser]:
+        with pytest.raises(TypeError, match=msg):
+            obj.loc[1:3]
+
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            # GH#31840 deprecated incorrect behavior
+            obj.loc[1:3] = 1
+
+    with pytest.raises(TypeError, match=msg):
+        df.loc[1:3, 1]
+
+    with tm.assert_produces_warning(FutureWarning):
+        # GH#31840 deprecated incorrect behavior
+        df.loc[1:3, 1] = 2
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
@@ -342,7 +342,7 @@ def test_repr(self, setup_path):
             df["timestamp2"] = Timestamp("20010103")
             df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
             df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
-            df.loc[3:6, ["obj1"]] = np.nan
+            df.loc[df.index[3:6], ["obj1"]] = np.nan
             df = df._consolidate()._convert(datetime=True)
 
             with catch_warnings(record=True):
@@ -846,7 +846,7 @@ def test_put_mixed_type(self, setup_path):
         df["timestamp2"] = Timestamp("20010103")
         df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
         df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
-        df.loc[3:6, ["obj1"]] = np.nan
+        df.loc[df.index[3:6], ["obj1"]] = np.nan
         df = df._consolidate()._convert(datetime=True)
 
         with ensure_clean_store(setup_path) as store:
@@ -1372,11 +1372,11 @@ def check_col(key, name, size):
                 _maybe_remove(store, "df")
                 df = tm.makeTimeDataFrame()
                 df["string"] = "foo"
-                df.loc[1:4, "string"] = np.nan
+                df.loc[df.index[1:4], "string"] = np.nan
                 df["string2"] = "bar"
-                df.loc[4:8, "string2"] = np.nan
+                df.loc[df.index[4:8], "string2"] = np.nan
                 df["string3"] = "bah"
-                df.loc[1:, "string3"] = np.nan
+                df.loc[df.index[1:], "string3"] = np.nan
                 store.append("df", df)
                 result = store.select("df")
                 tm.assert_frame_equal(result, df)
@@ -1492,8 +1492,8 @@ def test_append_with_data_columns(self, setup_path):
             # data column selection with a string data_column
             df_new = df.copy()
             df_new["string"] = "foo"
-            df_new.loc[1:4, "string"] = np.nan
-            df_new.loc[5:6, "string"] = "bar"
+            df_new.loc[df_new.index[1:4], "string"] = np.nan
+            df_new.loc[df_new.index[5:6], "string"] = "bar"
             _maybe_remove(store, "df")
             store.append("df", df_new, data_columns=["string"])
             result = store.select("df", "string='foo'")
@@ -1574,12 +1574,12 @@ def check_col(key, name, size):
             # doc example
             df_dc = df.copy()
             df_dc["string"] = "foo"
-            df_dc.loc[4:6, "string"] = np.nan
-            df_dc.loc[7:9, "string"] = "bar"
+            df_dc.loc[df_dc.index[4:6], "string"] = np.nan
+            df_dc.loc[df_dc.index[7:9], "string"] = "bar"
             df_dc["string2"] = "cool"
             df_dc["datetime"] = Timestamp("20010102")
             df_dc = df_dc._convert(datetime=True)
-            df_dc.loc[3:5, ["A", "B", "datetime"]] = np.nan
+            df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan
 
             _maybe_remove(store, "df_dc")
             store.append(
@@ -1602,8 +1602,8 @@ def check_col(key, name, size):
                 np.random.randn(8, 3), index=index, columns=["A", "B", "C"]
             )
             df_dc["string"] = "foo"
-            df_dc.loc[4:6, "string"] = np.nan
-            df_dc.loc[7:9, "string"] = "bar"
+            df_dc.loc[df_dc.index[4:6], "string"] = np.nan
+            df_dc.loc[df_dc.index[7:9], "string"] = "bar"
             df_dc.loc[:, ["B", "C"]] = df_dc.loc[:, ["B", "C"]].abs()
             df_dc["string2"] = "cool"
 
@@ -2024,7 +2024,7 @@ def test_table_mixed_dtypes(self, setup_path):
         df["timestamp2"] = Timestamp("20010103")
         df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)
         df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)
-        df.loc[3:6, ["obj1"]] = np.nan
+        df.loc[df.index[3:6], ["obj1"]] = np.nan
         df = df._consolidate()._convert(datetime=True)
 
         with ensure_clean_store(setup_path) as store:
@@ -2200,7 +2200,7 @@ def test_invalid_terms(self, setup_path):
 
                 df = tm.makeTimeDataFrame()
                 df["string"] = "foo"
-                df.loc[0:4, "string"] = "bar"
+                df.loc[df.index[0:4], "string"] = "bar"
 
                 store.put("df", df, format="table")
 
@@ -3343,7 +3343,7 @@ def test_string_select(self, setup_path):
 
             # test string ==/!=
             df["x"] = "none"
-            df.loc[2:7, "x"] = ""
+            df.loc[df.index[2:7], "x"] = ""
 
             store.append("df", df, data_columns=["x"])
 
@@ -3365,7 +3365,7 @@ def test_string_select(self, setup_path):
 
             # int ==/!=
             df["int"] = 1
-            df.loc[2:7, "int"] = 2
+            df.loc[df.index[2:7], "int"] = 2
 
             store.append("df3", df, data_columns=["int"])
 
@@ -3419,7 +3419,7 @@ def test_read_column(self, setup_path):
             # a data column with NaNs, result excludes the NaNs
             df3 = df.copy()
             df3["string"] = "foo"
-            df3.loc[4:6, "string"] = np.nan
+            df3.loc[df3.index[4:6], "string"] = np.nan
             store.append("df3", df3, data_columns=["string"])
             result = store.select_column("df3", "string")
             tm.assert_almost_equal(result.values, df3["string"].values)
diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py
diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py

Original file line number	Diff line number	Diff line change
`@@ -172,7 +172,7 @@ Deprecations`
`172`	`172`	`~~~~~~~~~~~~`
`173`	`173`	- Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
`174`	`174`	- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
`175`		`--`
	`175`	+- Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`)
`176`	`176`	`-`
`177`	`177`
`178`	`178`	`.. ---------------------------------------------------------------------------`
Original file line number	Diff line number	Diff line change
`@@ -761,7 +761,7 @@ def create_cols(name):`
`761`	`761`	`)`
`762`	`762`
`763`	`763`	`# add in some nans`
`764`		`- df_float.loc[30:50, 1:3] = np.nan`
	`764`	`+ df_float.iloc[30:50, 1:3] = np.nan`
`765`	`765`
`766`	`766`	`# ## this is a bug in read_csv right now ####`
`767`	`767`	`# df_dt.loc[30:50,1:3] = np.nan`