Skip to content

Commit 596ea0b

Browse files
authored
'Backport PR #56146: BUG raise pdep6 warning for loc full setter' (#56807)
1 parent 66df0bd commit 596ea0b

File tree

13 files changed

+89
-31
lines changed

13 files changed

+89
-31
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,7 @@ Conversion
817817
- Bug in :meth:`DataFrame.astype` when called with ``str`` on unpickled array - the array might change in-place (:issue:`54654`)
818818
- Bug in :meth:`DataFrame.astype` where ``errors="ignore"`` had no effect for extension types (:issue:`54654`)
819819
- Bug in :meth:`Series.convert_dtypes` not converting all NA column to ``null[pyarrow]`` (:issue:`55346`)
820+
- Bug in ``DataFrame.loc`` was not throwing "incompatible dtype warning" (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_) when assigning a ``Series`` with a different dtype using a full column setter (e.g. ``df.loc[:, 'a'] = incompatible_value``) (:issue:`39584`)
820821

821822
Strings
822823
^^^^^^^

pandas/core/indexing.py

+20
Original file line numberDiff line numberDiff line change
@@ -2141,6 +2141,26 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
21412141
# If we're setting an entire column and we can't do it inplace,
21422142
# then we can use value's dtype (or inferred dtype)
21432143
# instead of object
2144+
dtype = self.obj.dtypes.iloc[loc]
2145+
if dtype not in (np.void, object) and not self.obj.empty:
2146+
# - Exclude np.void, as that is a special case for expansion.
2147+
# We want to warn for
2148+
# df = pd.DataFrame({'a': [1, 2]})
2149+
# df.loc[:, 'a'] = .3
2150+
# but not for
2151+
# df = pd.DataFrame({'a': [1, 2]})
2152+
# df.loc[:, 'b'] = .3
2153+
# - Exclude `object`, as then no upcasting happens.
2154+
# - Exclude empty initial object with enlargement,
2155+
# as then there's nothing to be inconsistent with.
2156+
warnings.warn(
2157+
f"Setting an item of incompatible dtype is deprecated "
2158+
"and will raise in a future error of pandas. "
2159+
f"Value '{value}' has dtype incompatible with {dtype}, "
2160+
"please explicitly cast to a compatible dtype first.",
2161+
FutureWarning,
2162+
stacklevel=find_stack_level(),
2163+
)
21442164
self.obj.isetitem(loc, value)
21452165
else:
21462166
# set value into the column (first attempting to operate inplace, then

pandas/core/internals/blocks.py

+3
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,9 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
499499
and is_integer_dtype(self.values.dtype)
500500
and isna(other)
501501
and other is not NaT
502+
and not (
503+
isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other)
504+
)
502505
):
503506
warn_on_upcast = False
504507
elif (

pandas/tests/copy_view/test_indexing.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1144,11 +1144,16 @@ def test_set_value_copy_only_necessary_column(
11441144
df_orig = df.copy()
11451145
view = df[:]
11461146

1147-
if val == "a" and indexer[0] != slice(None):
1147+
if val == "a" and not warn_copy_on_write:
11481148
with tm.assert_produces_warning(
11491149
FutureWarning, match="Setting an item of incompatible dtype is deprecated"
11501150
):
11511151
indexer_func(df)[indexer] = val
1152+
if val == "a" and warn_copy_on_write:
1153+
with tm.assert_produces_warning(
1154+
FutureWarning, match="incompatible dtype|Setting a value on a view"
1155+
):
1156+
indexer_func(df)[indexer] = val
11521157
else:
11531158
with tm.assert_cow_warning(warn_copy_on_write and val == 100):
11541159
indexer_func(df)[indexer] = val

pandas/tests/frame/indexing/test_indexing.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -949,7 +949,8 @@ def test_setitem_frame_upcast(self):
949949
# needs upcasting
950950
df = DataFrame([[1, 2, "foo"], [3, 4, "bar"]], columns=["A", "B", "C"])
951951
df2 = df.copy()
952-
df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5
952+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
953+
df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5
953954
expected = df.reindex(columns=["A", "B"])
954955
expected += 0.5
955956
expected["C"] = df["C"]
@@ -1387,20 +1388,20 @@ def test_loc_expand_empty_frame_keep_midx_names(self):
13871388
tm.assert_frame_equal(df, expected)
13881389

13891390
@pytest.mark.parametrize(
1390-
"val, idxr, warn",
1391+
"val, idxr",
13911392
[
1392-
("x", "a", None), # TODO: this should warn as well
1393-
("x", ["a"], None), # TODO: this should warn as well
1394-
(1, "a", None), # TODO: this should warn as well
1395-
(1, ["a"], FutureWarning),
1393+
("x", "a"),
1394+
("x", ["a"]),
1395+
(1, "a"),
1396+
(1, ["a"]),
13961397
],
13971398
)
1398-
def test_loc_setitem_rhs_frame(self, idxr, val, warn):
1399+
def test_loc_setitem_rhs_frame(self, idxr, val):
13991400
# GH#47578
14001401
df = DataFrame({"a": [1, 2]})
14011402

14021403
with tm.assert_produces_warning(
1403-
warn, match="Setting an item of incompatible dtype"
1404+
FutureWarning, match="Setting an item of incompatible dtype"
14041405
):
14051406
df.loc[:, idxr] = DataFrame({"a": [val, 11]}, index=[1, 2])
14061407
expected = DataFrame({"a": [np.nan, val]})
@@ -1996,7 +1997,7 @@ def _check_setitem_invalid(self, df, invalid, indexer, warn):
19961997
np.datetime64("NaT"),
19971998
np.timedelta64("NaT"),
19981999
]
1999-
_indexers = [0, [0], slice(0, 1), [True, False, False]]
2000+
_indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]
20002001

20012002
@pytest.mark.parametrize(
20022003
"invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
@@ -2010,7 +2011,7 @@ def test_setitem_validation_scalar_bool(self, invalid, indexer):
20102011
@pytest.mark.parametrize("indexer", _indexers)
20112012
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
20122013
df = DataFrame({"a": [1, 2, 3]}, dtype=any_int_numpy_dtype)
2013-
if isna(invalid) and invalid is not pd.NaT:
2014+
if isna(invalid) and invalid is not pd.NaT and not np.isnat(invalid):
20142015
warn = None
20152016
else:
20162017
warn = FutureWarning

pandas/tests/frame/indexing/test_setitem.py

+20
Original file line numberDiff line numberDiff line change
@@ -1381,3 +1381,23 @@ def test_frame_setitem_empty_dataframe(self):
13811381
index=dti[:0],
13821382
)
13831383
tm.assert_frame_equal(df, expected)
1384+
1385+
1386+
def test_full_setter_loc_incompatible_dtype():
1387+
# https://github.com/pandas-dev/pandas/issues/55791
1388+
df = DataFrame({"a": [1, 2]})
1389+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
1390+
df.loc[:, "a"] = True
1391+
expected = DataFrame({"a": [True, True]})
1392+
tm.assert_frame_equal(df, expected)
1393+
1394+
df = DataFrame({"a": [1, 2]})
1395+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
1396+
df.loc[:, "a"] = {0: 3.5, 1: 4.5}
1397+
expected = DataFrame({"a": [3.5, 4.5]})
1398+
tm.assert_frame_equal(df, expected)
1399+
1400+
df = DataFrame({"a": [1, 2]})
1401+
df.loc[:, "a"] = {0: 3, 1: 4}
1402+
expected = DataFrame({"a": [3, 4]})
1403+
tm.assert_frame_equal(df, expected)

pandas/tests/frame/methods/test_update.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -160,11 +160,8 @@ def test_update_with_different_dtype(self, using_copy_on_write):
160160
# GH#3217
161161
df = DataFrame({"a": [1, 3], "b": [np.nan, 2]})
162162
df["c"] = np.nan
163-
if using_copy_on_write:
163+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
164164
df.update({"c": Series(["foo"], index=[0])})
165-
else:
166-
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
167-
df["c"].update(Series(["foo"], index=[0]))
168165

169166
expected = DataFrame(
170167
{

pandas/tests/frame/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2857,7 +2857,7 @@ def test_dict_data_arrow_column_expansion(self, key_val, col_vals, col_type):
28572857
)
28582858
result = DataFrame({key_val: [1, 2]}, columns=cols)
28592859
expected = DataFrame([[1, np.nan], [2, np.nan]], columns=cols)
2860-
expected.iloc[:, 1] = expected.iloc[:, 1].astype(object)
2860+
expected.isetitem(1, expected.iloc[:, 1].astype(object))
28612861
tm.assert_frame_equal(result, expected)
28622862

28632863

pandas/tests/indexing/test_iloc.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,8 @@ def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(
535535

536536
# if the assigned values cannot be held by existing integer arrays,
537537
# we cast
538-
df.iloc[:, 0] = df.iloc[:, 0] + 0.5
538+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
539+
df.iloc[:, 0] = df.iloc[:, 0] + 0.5
539540
if not using_array_manager:
540541
assert len(df._mgr.blocks) == 2
541542

@@ -1471,6 +1472,7 @@ def test_iloc_setitem_pure_position_based(self):
14711472
def test_iloc_nullable_int64_size_1_nan(self):
14721473
# GH 31861
14731474
result = DataFrame({"a": ["test"], "b": [np.nan]})
1474-
result.loc[:, "b"] = result.loc[:, "b"].astype("Int64")
1475+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
1476+
result.loc[:, "b"] = result.loc[:, "b"].astype("Int64")
14751477
expected = DataFrame({"a": ["test"], "b": array([NA], dtype="Int64")})
14761478
tm.assert_frame_equal(result, expected)

pandas/tests/indexing/test_loc.py

+17-8
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,8 @@ def test_loc_setitem_consistency(self, frame_for_consistency, val):
584584
}
585585
)
586586
df = frame_for_consistency.copy()
587-
df.loc[:, "date"] = val
587+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
588+
df.loc[:, "date"] = val
588589
tm.assert_frame_equal(df, expected)
589590

590591
def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
@@ -598,7 +599,8 @@ def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
598599
}
599600
)
600601
df = frame_for_consistency.copy()
601-
df.loc[:, "date"] = "foo"
602+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
603+
df.loc[:, "date"] = "foo"
602604
tm.assert_frame_equal(df, expected)
603605

604606
def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency):
@@ -611,14 +613,16 @@ def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency):
611613
}
612614
)
613615
df = frame_for_consistency.copy()
614-
df.loc[:, "date"] = 1.0
616+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
617+
df.loc[:, "date"] = 1.0
615618
tm.assert_frame_equal(df, expected)
616619

617620
def test_loc_setitem_consistency_single_row(self):
618621
# GH 15494
619622
# setting on frame with single row
620623
df = DataFrame({"date": Series([Timestamp("20180101")])})
621-
df.loc[:, "date"] = "string"
624+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
625+
df.loc[:, "date"] = "string"
622626
expected = DataFrame({"date": Series(["string"])})
623627
tm.assert_frame_equal(df, expected)
624628

@@ -678,9 +682,10 @@ def test_loc_setitem_consistency_slice_column_len(self):
678682

679683
# timedelta64[m] -> float, so this cannot be done inplace, so
680684
# no warning
681-
df.loc[:, ("Respondent", "Duration")] = df.loc[
682-
:, ("Respondent", "Duration")
683-
] / Timedelta(60_000_000_000)
685+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
686+
df.loc[:, ("Respondent", "Duration")] = df.loc[
687+
:, ("Respondent", "Duration")
688+
] / Timedelta(60_000_000_000)
684689

685690
expected = Series(
686691
[23.0, 12.0, 14.0, 36.0], index=df.index, name=("Respondent", "Duration")
@@ -1487,7 +1492,11 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
14871492
# if result started off with object dtype, then the .loc.__setitem__
14881493
# below would retain object dtype
14891494
result = DataFrame(index=idx, columns=["var"], dtype=np.float64)
1490-
result.loc[:, idxer] = expected
1495+
with tm.assert_produces_warning(
1496+
FutureWarning if idxer == "var" else None, match="incompatible dtype"
1497+
):
1498+
# See https://github.com/pandas-dev/pandas/issues/56223
1499+
result.loc[:, idxer] = expected
14911500
tm.assert_frame_equal(result, expected)
14921501

14931502
def test_loc_setitem_time_key(self, using_array_manager):

pandas/tests/io/json/test_pandas.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def test_frame_non_unique_columns(self, orient, data):
179179
# in milliseconds; these are internally stored in nanosecond,
180180
# so divide to get where we need
181181
# TODO: a to_epoch method would also solve; see GH 14772
182-
expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1000000
182+
expected.isetitem(0, expected.iloc[:, 0].astype(np.int64) // 1000000)
183183
elif orient == "split":
184184
expected = df
185185
expected.columns = ["x", "x.1"]

pandas/tests/reshape/merge/test_merge.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2984,9 +2984,9 @@ def test_merge_empty_frames_column_order(left_empty, right_empty):
29842984
if left_empty and right_empty:
29852985
expected = expected.iloc[:0]
29862986
elif left_empty:
2987-
expected.loc[:, "B"] = np.nan
2987+
expected["B"] = np.nan
29882988
elif right_empty:
2989-
expected.loc[:, ["C", "D"]] = np.nan
2989+
expected[["C", "D"]] = np.nan
29902990
tm.assert_frame_equal(result, expected)
29912991

29922992

pandas/tests/series/indexing/test_indexing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ def _check_setitem_invalid(self, ser, invalid, indexer, warn):
491491
np.datetime64("NaT"),
492492
np.timedelta64("NaT"),
493493
]
494-
_indexers = [0, [0], slice(0, 1), [True, False, False]]
494+
_indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]
495495

496496
@pytest.mark.parametrize(
497497
"invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
@@ -505,7 +505,7 @@ def test_setitem_validation_scalar_bool(self, invalid, indexer):
505505
@pytest.mark.parametrize("indexer", _indexers)
506506
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
507507
ser = Series([1, 2, 3], dtype=any_int_numpy_dtype)
508-
if isna(invalid) and invalid is not NaT:
508+
if isna(invalid) and invalid is not NaT and not np.isnat(invalid):
509509
warn = None
510510
else:
511511
warn = FutureWarning

0 commit comments

Comments
 (0)