Skip to content

Commit f6d3e59

Browse files
MarcoGorellipmhatre1
authored andcommitted
BUG raise pdep6 warning for loc full setter (pandas-dev#56146)
1 parent 9642eec commit f6d3e59

File tree

13 files changed

+89
-31
lines changed

13 files changed

+89
-31
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,7 @@ Conversion
817817
- Bug in :meth:`DataFrame.astype` when called with ``str`` on unpickled array - the array might change in-place (:issue:`54654`)
818818
- Bug in :meth:`DataFrame.astype` where ``errors="ignore"`` had no effect for extension types (:issue:`54654`)
819819
- Bug in :meth:`Series.convert_dtypes` not converting all NA column to ``null[pyarrow]`` (:issue:`55346`)
820+
- Bug in ``DataFrame.loc`` was not throwing "incompatible dtype warning" (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_) when assigning a ``Series`` with a different dtype using a full column setter (e.g. ``df.loc[:, 'a'] = incompatible_value``) (:issue:`39584`)
820821

821822
Strings
822823
^^^^^^^

pandas/core/indexing.py

+20
Original file line numberDiff line numberDiff line change
@@ -2143,6 +2143,26 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
21432143
# If we're setting an entire column and we can't do it inplace,
21442144
# then we can use value's dtype (or inferred dtype)
21452145
# instead of object
2146+
dtype = self.obj.dtypes.iloc[loc]
2147+
if dtype not in (np.void, object) and not self.obj.empty:
2148+
# - Exclude np.void, as that is a special case for expansion.
2149+
# We want to warn for
2150+
# df = pd.DataFrame({'a': [1, 2]})
2151+
# df.loc[:, 'a'] = .3
2152+
# but not for
2153+
# df = pd.DataFrame({'a': [1, 2]})
2154+
# df.loc[:, 'b'] = .3
2155+
# - Exclude `object`, as then no upcasting happens.
2156+
# - Exclude empty initial object with enlargement,
2157+
# as then there's nothing to be inconsistent with.
2158+
warnings.warn(
2159+
f"Setting an item of incompatible dtype is deprecated "
2160+
"and will raise in a future error of pandas. "
2161+
f"Value '{value}' has dtype incompatible with {dtype}, "
2162+
"please explicitly cast to a compatible dtype first.",
2163+
FutureWarning,
2164+
stacklevel=find_stack_level(),
2165+
)
21462166
self.obj.isetitem(loc, value)
21472167
else:
21482168
# set value into the column (first attempting to operate inplace, then

pandas/core/internals/blocks.py

+3
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,9 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
499499
and is_integer_dtype(self.values.dtype)
500500
and isna(other)
501501
and other is not NaT
502+
and not (
503+
isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other)
504+
)
502505
):
503506
warn_on_upcast = False
504507
elif (

pandas/tests/copy_view/test_indexing.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1103,11 +1103,16 @@ def test_set_value_copy_only_necessary_column(
11031103
df_orig = df.copy()
11041104
view = df[:]
11051105

1106-
if val == "a" and indexer[0] != slice(None):
1106+
if val == "a" and not warn_copy_on_write:
11071107
with tm.assert_produces_warning(
11081108
FutureWarning, match="Setting an item of incompatible dtype is deprecated"
11091109
):
11101110
indexer_func(df)[indexer] = val
1111+
if val == "a" and warn_copy_on_write:
1112+
with tm.assert_produces_warning(
1113+
FutureWarning, match="incompatible dtype|Setting a value on a view"
1114+
):
1115+
indexer_func(df)[indexer] = val
11111116
else:
11121117
with tm.assert_cow_warning(warn_copy_on_write and val == 100):
11131118
indexer_func(df)[indexer] = val

pandas/tests/frame/indexing/test_indexing.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -945,7 +945,8 @@ def test_setitem_frame_upcast(self):
945945
# needs upcasting
946946
df = DataFrame([[1, 2, "foo"], [3, 4, "bar"]], columns=["A", "B", "C"])
947947
df2 = df.copy()
948-
df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5
948+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
949+
df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5
949950
expected = df.reindex(columns=["A", "B"])
950951
expected += 0.5
951952
expected["C"] = df["C"]
@@ -1381,20 +1382,20 @@ def test_loc_expand_empty_frame_keep_midx_names(self):
13811382
tm.assert_frame_equal(df, expected)
13821383

13831384
@pytest.mark.parametrize(
1384-
"val, idxr, warn",
1385+
"val, idxr",
13851386
[
1386-
("x", "a", None), # TODO: this should warn as well
1387-
("x", ["a"], None), # TODO: this should warn as well
1388-
(1, "a", None), # TODO: this should warn as well
1389-
(1, ["a"], FutureWarning),
1387+
("x", "a"),
1388+
("x", ["a"]),
1389+
(1, "a"),
1390+
(1, ["a"]),
13901391
],
13911392
)
1392-
def test_loc_setitem_rhs_frame(self, idxr, val, warn):
1393+
def test_loc_setitem_rhs_frame(self, idxr, val):
13931394
# GH#47578
13941395
df = DataFrame({"a": [1, 2]})
13951396

13961397
with tm.assert_produces_warning(
1397-
warn, match="Setting an item of incompatible dtype"
1398+
FutureWarning, match="Setting an item of incompatible dtype"
13981399
):
13991400
df.loc[:, idxr] = DataFrame({"a": [val, 11]}, index=[1, 2])
14001401
expected = DataFrame({"a": [np.nan, val]})
@@ -1968,7 +1969,7 @@ def _check_setitem_invalid(self, df, invalid, indexer, warn):
19681969
np.datetime64("NaT"),
19691970
np.timedelta64("NaT"),
19701971
]
1971-
_indexers = [0, [0], slice(0, 1), [True, False, False]]
1972+
_indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]
19721973

19731974
@pytest.mark.parametrize(
19741975
"invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
@@ -1982,7 +1983,7 @@ def test_setitem_validation_scalar_bool(self, invalid, indexer):
19821983
@pytest.mark.parametrize("indexer", _indexers)
19831984
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
19841985
df = DataFrame({"a": [1, 2, 3]}, dtype=any_int_numpy_dtype)
1985-
if isna(invalid) and invalid is not pd.NaT:
1986+
if isna(invalid) and invalid is not pd.NaT and not np.isnat(invalid):
19861987
warn = None
19871988
else:
19881989
warn = FutureWarning

pandas/tests/frame/indexing/test_setitem.py

+20
Original file line numberDiff line numberDiff line change
@@ -1369,3 +1369,23 @@ def test_frame_setitem_empty_dataframe(self):
13691369
index=dti[:0],
13701370
)
13711371
tm.assert_frame_equal(df, expected)
1372+
1373+
1374+
def test_full_setter_loc_incompatible_dtype():
1375+
# https://github.com/pandas-dev/pandas/issues/55791
1376+
df = DataFrame({"a": [1, 2]})
1377+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
1378+
df.loc[:, "a"] = True
1379+
expected = DataFrame({"a": [True, True]})
1380+
tm.assert_frame_equal(df, expected)
1381+
1382+
df = DataFrame({"a": [1, 2]})
1383+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
1384+
df.loc[:, "a"] = {0: 3.5, 1: 4.5}
1385+
expected = DataFrame({"a": [3.5, 4.5]})
1386+
tm.assert_frame_equal(df, expected)
1387+
1388+
df = DataFrame({"a": [1, 2]})
1389+
df.loc[:, "a"] = {0: 3, 1: 4}
1390+
expected = DataFrame({"a": [3, 4]})
1391+
tm.assert_frame_equal(df, expected)

pandas/tests/frame/methods/test_update.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,8 @@ def test_update_with_different_dtype(self, using_copy_on_write):
158158
# GH#3217
159159
df = DataFrame({"a": [1, 3], "b": [np.nan, 2]})
160160
df["c"] = np.nan
161-
if using_copy_on_write:
161+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
162162
df.update({"c": Series(["foo"], index=[0])})
163-
else:
164-
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
165-
df["c"].update(Series(["foo"], index=[0]))
166163

167164
expected = DataFrame(
168165
{

pandas/tests/frame/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2815,7 +2815,7 @@ def test_dict_data_arrow_column_expansion(self, key_val, col_vals, col_type):
28152815
)
28162816
result = DataFrame({key_val: [1, 2]}, columns=cols)
28172817
expected = DataFrame([[1, np.nan], [2, np.nan]], columns=cols)
2818-
expected.iloc[:, 1] = expected.iloc[:, 1].astype(object)
2818+
expected.isetitem(1, expected.iloc[:, 1].astype(object))
28192819
tm.assert_frame_equal(result, expected)
28202820

28212821

pandas/tests/indexing/test_iloc.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,8 @@ def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(self):
534534

535535
# if the assigned values cannot be held by existing integer arrays,
536536
# we cast
537-
df.iloc[:, 0] = df.iloc[:, 0] + 0.5
537+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
538+
df.iloc[:, 0] = df.iloc[:, 0] + 0.5
538539
assert len(df._mgr.blocks) == 2
539540

540541
expected = df.copy()
@@ -1468,6 +1469,7 @@ def test_iloc_setitem_pure_position_based(self):
14681469
def test_iloc_nullable_int64_size_1_nan(self):
14691470
# GH 31861
14701471
result = DataFrame({"a": ["test"], "b": [np.nan]})
1471-
result.loc[:, "b"] = result.loc[:, "b"].astype("Int64")
1472+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
1473+
result.loc[:, "b"] = result.loc[:, "b"].astype("Int64")
14721474
expected = DataFrame({"a": ["test"], "b": array([NA], dtype="Int64")})
14731475
tm.assert_frame_equal(result, expected)

pandas/tests/indexing/test_loc.py

+17-8
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,8 @@ def test_loc_setitem_consistency(self, frame_for_consistency, val):
578578
}
579579
)
580580
df = frame_for_consistency.copy()
581-
df.loc[:, "date"] = val
581+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
582+
df.loc[:, "date"] = val
582583
tm.assert_frame_equal(df, expected)
583584

584585
def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
@@ -592,7 +593,8 @@ def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
592593
}
593594
)
594595
df = frame_for_consistency.copy()
595-
df.loc[:, "date"] = "foo"
596+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
597+
df.loc[:, "date"] = "foo"
596598
tm.assert_frame_equal(df, expected)
597599

598600
def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency):
@@ -605,14 +607,16 @@ def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency):
605607
}
606608
)
607609
df = frame_for_consistency.copy()
608-
df.loc[:, "date"] = 1.0
610+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
611+
df.loc[:, "date"] = 1.0
609612
tm.assert_frame_equal(df, expected)
610613

611614
def test_loc_setitem_consistency_single_row(self):
612615
# GH 15494
613616
# setting on frame with single row
614617
df = DataFrame({"date": Series([Timestamp("20180101")])})
615-
df.loc[:, "date"] = "string"
618+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
619+
df.loc[:, "date"] = "string"
616620
expected = DataFrame({"date": Series(["string"])})
617621
tm.assert_frame_equal(df, expected)
618622

@@ -672,9 +676,10 @@ def test_loc_setitem_consistency_slice_column_len(self):
672676

673677
# timedelta64[m] -> float, so this cannot be done inplace, so
674678
# no warning
675-
df.loc[:, ("Respondent", "Duration")] = df.loc[
676-
:, ("Respondent", "Duration")
677-
] / Timedelta(60_000_000_000)
679+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
680+
df.loc[:, ("Respondent", "Duration")] = df.loc[
681+
:, ("Respondent", "Duration")
682+
] / Timedelta(60_000_000_000)
678683

679684
expected = Series(
680685
[23.0, 12.0, 14.0, 36.0], index=df.index, name=("Respondent", "Duration")
@@ -1481,7 +1486,11 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
14811486
# if result started off with object dtype, then the .loc.__setitem__
14821487
# below would retain object dtype
14831488
result = DataFrame(index=idx, columns=["var"], dtype=np.float64)
1484-
result.loc[:, idxer] = expected
1489+
with tm.assert_produces_warning(
1490+
FutureWarning if idxer == "var" else None, match="incompatible dtype"
1491+
):
1492+
# See https://github.com/pandas-dev/pandas/issues/56223
1493+
result.loc[:, idxer] = expected
14851494
tm.assert_frame_equal(result, expected)
14861495

14871496
def test_loc_setitem_time_key(self):

pandas/tests/io/json/test_pandas.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def test_frame_non_unique_columns(self, orient, data):
168168
# in milliseconds; these are internally stored in nanosecond,
169169
# so divide to get where we need
170170
# TODO: a to_epoch method would also solve; see GH 14772
171-
expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1000000
171+
expected.isetitem(0, expected.iloc[:, 0].astype(np.int64) // 1000000)
172172
elif orient == "split":
173173
expected = df
174174
expected.columns = ["x", "x.1"]

pandas/tests/reshape/merge/test_merge.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2964,9 +2964,9 @@ def test_merge_empty_frames_column_order(left_empty, right_empty):
29642964
if left_empty and right_empty:
29652965
expected = expected.iloc[:0]
29662966
elif left_empty:
2967-
expected.loc[:, "B"] = np.nan
2967+
expected["B"] = np.nan
29682968
elif right_empty:
2969-
expected.loc[:, ["C", "D"]] = np.nan
2969+
expected[["C", "D"]] = np.nan
29702970
tm.assert_frame_equal(result, expected)
29712971

29722972

pandas/tests/series/indexing/test_indexing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ def _check_setitem_invalid(self, ser, invalid, indexer, warn):
491491
np.datetime64("NaT"),
492492
np.timedelta64("NaT"),
493493
]
494-
_indexers = [0, [0], slice(0, 1), [True, False, False]]
494+
_indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]
495495

496496
@pytest.mark.parametrize(
497497
"invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
@@ -505,7 +505,7 @@ def test_setitem_validation_scalar_bool(self, invalid, indexer):
505505
@pytest.mark.parametrize("indexer", _indexers)
506506
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
507507
ser = Series([1, 2, 3], dtype=any_int_numpy_dtype)
508-
if isna(invalid) and invalid is not NaT:
508+
if isna(invalid) and invalid is not NaT and not np.isnat(invalid):
509509
warn = None
510510
else:
511511
warn = FutureWarning

0 commit comments

Comments
 (0)