pandas-dev · MarcoGorelli · Jan 10, 2024 · Jan 9, 2024
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -817,6 +817,7 @@ Conversion
 - Bug in :meth:`DataFrame.astype` when called with ``str`` on unpickled array - the array might change in-place (:issue:`54654`)
 - Bug in :meth:`DataFrame.astype` where ``errors="ignore"`` had no effect for extension types (:issue:`54654`)
 - Bug in :meth:`Series.convert_dtypes` not converting all NA column to ``null[pyarrow]`` (:issue:`55346`)
+- Bug in ``DataFrame.loc`` was not throwing "incompatible dtype warning" (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_) when assigning a ``Series`` with a different dtype using a full column setter (e.g. ``df.loc[:, 'a'] = incompatible_value``) (:issue:`39584`)
 
 Strings
 ^^^^^^^

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -2141,6 +2141,26 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
                 # If we're setting an entire column and we can't do it inplace,
                 #  then we can use value's dtype (or inferred dtype)
                 #  instead of object
+                dtype = self.obj.dtypes.iloc[loc]
+                if dtype not in (np.void, object) and not self.obj.empty:
+                    # - Exclude np.void, as that is a special case for expansion.
+                    #   We want to warn for
+                    #       df = pd.DataFrame({'a': [1, 2]})
+                    #       df.loc[:, 'a'] = .3
+                    #   but not for
+                    #       df = pd.DataFrame({'a': [1, 2]})
+                    #       df.loc[:, 'b'] = .3
+                    # - Exclude `object`, as then no upcasting happens.
+                    # - Exclude empty initial object with enlargement,
+                    #   as then there's nothing to be inconsistent with.
+                    warnings.warn(
+                        f"Setting an item of incompatible dtype is deprecated "
+                        "and will raise in a future error of pandas. "
+                        f"Value '{value}' has dtype incompatible with {dtype}, "
+                        "please explicitly cast to a compatible dtype first.",
+                        FutureWarning,
+                        stacklevel=find_stack_level(),
+                    )
                 self.obj.isetitem(loc, value)
         else:
             # set value into the column (first attempting to operate inplace, then

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -499,6 +499,9 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
             and is_integer_dtype(self.values.dtype)
             and isna(other)
             and other is not NaT
+            and not (
+                isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other)
+            )
         ):
             warn_on_upcast = False
         elif (

diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py
@@ -1144,11 +1144,16 @@ def test_set_value_copy_only_necessary_column(
     df_orig = df.copy()
     view = df[:]
 
-    if val == "a" and indexer[0] != slice(None):
+    if val == "a" and not warn_copy_on_write:
         with tm.assert_produces_warning(
             FutureWarning, match="Setting an item of incompatible dtype is deprecated"
         ):
             indexer_func(df)[indexer] = val
+    if val == "a" and warn_copy_on_write:
+        with tm.assert_produces_warning(
+            FutureWarning, match="incompatible dtype|Setting a value on a view"
+        ):
+            indexer_func(df)[indexer] = val
     else:
         with tm.assert_cow_warning(warn_copy_on_write and val == 100):
             indexer_func(df)[indexer] = val

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -949,7 +949,8 @@ def test_setitem_frame_upcast(self):
         # needs upcasting
         df = DataFrame([[1, 2, "foo"], [3, 4, "bar"]], columns=["A", "B", "C"])
         df2 = df.copy()
-        df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5
+        with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
+            df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5
         expected = df.reindex(columns=["A", "B"])
         expected += 0.5
         expected["C"] = df["C"]
@@ -1387,20 +1388,20 @@ def test_loc_expand_empty_frame_keep_midx_names(self):
         tm.assert_frame_equal(df, expected)
 
     @pytest.mark.parametrize(
-        "val, idxr, warn",
+        "val, idxr",
         [
-            ("x", "a", None),  # TODO: this should warn as well
-            ("x", ["a"], None),  # TODO: this should warn as well
-            (1, "a", None),  # TODO: this should warn as well
-            (1, ["a"], FutureWarning),
+            ("x", "a"),
+            ("x", ["a"]),
+            (1, "a"),
+            (1, ["a"]),
         ],
     )
-    def test_loc_setitem_rhs_frame(self, idxr, val, warn):
+    def test_loc_setitem_rhs_frame(self, idxr, val):
         # GH#47578
         df = DataFrame({"a": [1, 2]})
 
         with tm.assert_produces_warning(
-            warn, match="Setting an item of incompatible dtype"
+            FutureWarning, match="Setting an item of incompatible dtype"
         ):
             df.loc[:, idxr] = DataFrame({"a": [val, 11]}, index=[1, 2])
         expected = DataFrame({"a": [np.nan, val]})
@@ -1996,7 +1997,7 @@ def _check_setitem_invalid(self, df, invalid, indexer, warn):
         np.datetime64("NaT"),
         np.timedelta64("NaT"),
     ]
-    _indexers = [0, [0], slice(0, 1), [True, False, False]]
+    _indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]
 
     @pytest.mark.parametrize(
         "invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
@@ -2010,7 +2011,7 @@ def test_setitem_validation_scalar_bool(self, invalid, indexer):
     @pytest.mark.parametrize("indexer", _indexers)
     def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
         df = DataFrame({"a": [1, 2, 3]}, dtype=any_int_numpy_dtype)
-        if isna(invalid) and invalid is not pd.NaT:
+        if isna(invalid) and invalid is not pd.NaT and not np.isnat(invalid):
             warn = None
         else:
             warn = FutureWarning

diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
@@ -1381,3 +1381,23 @@ def test_frame_setitem_empty_dataframe(self):
             index=dti[:0],
         )
         tm.assert_frame_equal(df, expected)
+
+
+def test_full_setter_loc_incompatible_dtype():
+    # https://github.com/pandas-dev/pandas/issues/55791
+    df = DataFrame({"a": [1, 2]})
+    with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
+        df.loc[:, "a"] = True
+    expected = DataFrame({"a": [True, True]})
+    tm.assert_frame_equal(df, expected)
+
+    df = DataFrame({"a": [1, 2]})
+    with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
+        df.loc[:, "a"] = {0: 3.5, 1: 4.5}
+    expected = DataFrame({"a": [3.5, 4.5]})
+    tm.assert_frame_equal(df, expected)
+
+    df = DataFrame({"a": [1, 2]})
+    df.loc[:, "a"] = {0: 3, 1: 4}
+    expected = DataFrame({"a": [3, 4]})
+    tm.assert_frame_equal(df, expected)
diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py
@@ -160,11 +160,8 @@ def test_update_with_different_dtype(self, using_copy_on_write):
         # GH#3217
         df = DataFrame({"a": [1, 3], "b": [np.nan, 2]})
         df["c"] = np.nan
-        if using_copy_on_write:
+        with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
             df.update({"c": Series(["foo"], index=[0])})
-        else:
-            with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
-                df["c"].update(Series(["foo"], index=[0]))
 
         expected = DataFrame(
             {

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -2857,7 +2857,7 @@ def test_dict_data_arrow_column_expansion(self, key_val, col_vals, col_type):
         )
         result = DataFrame({key_val: [1, 2]}, columns=cols)
         expected = DataFrame([[1, np.nan], [2, np.nan]], columns=cols)
-        expected.iloc[:, 1] = expected.iloc[:, 1].astype(object)
+        expected.isetitem(1, expected.iloc[:, 1].astype(object))
         tm.assert_frame_equal(result, expected)
 
 

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -535,7 +535,8 @@ def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(
 
         # if the assigned values cannot be held by existing integer arrays,
         #  we cast
-        df.iloc[:, 0] = df.iloc[:, 0] + 0.5
+        with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
+            df.iloc[:, 0] = df.iloc[:, 0] + 0.5
         if not using_array_manager:
             assert len(df._mgr.blocks) == 2
 
@@ -1471,6 +1472,7 @@ def test_iloc_setitem_pure_position_based(self):
     def test_iloc_nullable_int64_size_1_nan(self):
         # GH 31861
         result = DataFrame({"a": ["test"], "b": [np.nan]})
-        result.loc[:, "b"] = result.loc[:, "b"].astype("Int64")
+        with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
+            result.loc[:, "b"] = result.loc[:, "b"].astype("Int64")
         expected = DataFrame({"a": ["test"], "b": array([NA], dtype="Int64")})
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -584,7 +584,8 @@ def test_loc_setitem_consistency(self, frame_for_consistency, val):
             }
         )
         df = frame_for_consistency.copy()
-        df.loc[:, "date"] = val
+        with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
+            df.loc[:, "date"] = val
         tm.assert_frame_equal(df, expected)
 
     def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
@@ -598,7 +599,8 @@ def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
             }
         )
         df = frame_for_consistency.copy()
-        df.loc[:, "date"] = "foo"
+        with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
+            df.loc[:, "date"] = "foo"
         tm.assert_frame_equal(df, expected)
 
     def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency):
@@ -611,14 +613,16 @@ def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency):
             }
         )
         df = frame_for_consistency.copy()
-        df.loc[:, "date"] = 1.0
+        with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
+            df.loc[:, "date"] = 1.0
         tm.assert_frame_equal(df, expected)
 
     def test_loc_setitem_consistency_single_row(self):
         # GH 15494
         # setting on frame with single row
         df = DataFrame({"date": Series([Timestamp("20180101")])})
-        df.loc[:, "date"] = "string"
+        with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
+            df.loc[:, "date"] = "string"
         expected = DataFrame({"date": Series(["string"])})
         tm.assert_frame_equal(df, expected)
 
@@ -678,9 +682,10 @@ def test_loc_setitem_consistency_slice_column_len(self):
 
         # timedelta64[m] -> float, so this cannot be done inplace, so
         #  no warning
-        df.loc[:, ("Respondent", "Duration")] = df.loc[
-            :, ("Respondent", "Duration")
-        ] / Timedelta(60_000_000_000)
+        with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
+            df.loc[:, ("Respondent", "Duration")] = df.loc[
+                :, ("Respondent", "Duration")
+            ] / Timedelta(60_000_000_000)
 
         expected = Series(
             [23.0, 12.0, 14.0, 36.0], index=df.index, name=("Respondent", "Duration")
@@ -1487,7 +1492,11 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
         # if result started off with object dtype, then the .loc.__setitem__
         #  below would retain object dtype
         result = DataFrame(index=idx, columns=["var"], dtype=np.float64)
-        result.loc[:, idxer] = expected
+        with tm.assert_produces_warning(
+            FutureWarning if idxer == "var" else None, match="incompatible dtype"
+        ):
+            # See https://github.com/pandas-dev/pandas/issues/56223
+            result.loc[:, idxer] = expected
         tm.assert_frame_equal(result, expected)
 
     def test_loc_setitem_time_key(self, using_array_manager):

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -179,7 +179,7 @@ def test_frame_non_unique_columns(self, orient, data):
                 # in milliseconds; these are internally stored in nanosecond,
                 # so divide to get where we need
                 # TODO: a to_epoch method would also solve; see GH 14772
-                expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1000000
+                expected.isetitem(0, expected.iloc[:, 0].astype(np.int64) // 1000000)
         elif orient == "split":
             expected = df
             expected.columns = ["x", "x.1"]

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -2984,9 +2984,9 @@ def test_merge_empty_frames_column_order(left_empty, right_empty):
     if left_empty and right_empty:
         expected = expected.iloc[:0]
     elif left_empty:
-        expected.loc[:, "B"] = np.nan
+        expected["B"] = np.nan
     elif right_empty:
-        expected.loc[:, ["C", "D"]] = np.nan
+        expected[["C", "D"]] = np.nan
     tm.assert_frame_equal(result, expected)
 
 

diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py
@@ -491,7 +491,7 @@ def _check_setitem_invalid(self, ser, invalid, indexer, warn):
         np.datetime64("NaT"),
         np.timedelta64("NaT"),
     ]
-    _indexers = [0, [0], slice(0, 1), [True, False, False]]
+    _indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]
 
     @pytest.mark.parametrize(
         "invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
@@ -505,7 +505,7 @@ def test_setitem_validation_scalar_bool(self, invalid, indexer):
     @pytest.mark.parametrize("indexer", _indexers)
     def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
         ser = Series([1, 2, 3], dtype=any_int_numpy_dtype)
-        if isna(invalid) and invalid is not NaT:
+        if isna(invalid) and invalid is not NaT and not np.isnat(invalid):
             warn = None
         else:
             warn = FutureWarning