Skip to content

'Backport PR #56146: BUG raise pdep6 warning for loc full setter' #56807

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,7 @@ Conversion
- Bug in :meth:`DataFrame.astype` when called with ``str`` on unpickled array - the array might change in-place (:issue:`54654`)
- Bug in :meth:`DataFrame.astype` where ``errors="ignore"`` had no effect for extension types (:issue:`54654`)
- Bug in :meth:`Series.convert_dtypes` not converting all NA column to ``null[pyarrow]`` (:issue:`55346`)
- Bug in ``DataFrame.loc`` was not throwing "incompatible dtype warning" (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_) when assigning a ``Series`` with a different dtype using a full column setter (e.g. ``df.loc[:, 'a'] = incompatible_value``) (:issue:`39584`)

Strings
^^^^^^^
Expand Down
20 changes: 20 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2141,6 +2141,26 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
# If we're setting an entire column and we can't do it inplace,
# then we can use value's dtype (or inferred dtype)
# instead of object
dtype = self.obj.dtypes.iloc[loc]
if dtype not in (np.void, object) and not self.obj.empty:
# - Exclude np.void, as that is a special case for expansion.
# We want to warn for
# df = pd.DataFrame({'a': [1, 2]})
# df.loc[:, 'a'] = .3
# but not for
# df = pd.DataFrame({'a': [1, 2]})
# df.loc[:, 'b'] = .3
# - Exclude `object`, as then no upcasting happens.
# - Exclude empty initial object with enlargement,
# as then there's nothing to be inconsistent with.
warnings.warn(
f"Setting an item of incompatible dtype is deprecated "
"and will raise in a future error of pandas. "
f"Value '{value}' has dtype incompatible with {dtype}, "
"please explicitly cast to a compatible dtype first.",
FutureWarning,
stacklevel=find_stack_level(),
)
self.obj.isetitem(loc, value)
else:
# set value into the column (first attempting to operate inplace, then
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,9 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
and is_integer_dtype(self.values.dtype)
and isna(other)
and other is not NaT
and not (
isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other)
)
):
warn_on_upcast = False
elif (
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/copy_view/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1144,11 +1144,16 @@ def test_set_value_copy_only_necessary_column(
df_orig = df.copy()
view = df[:]

if val == "a" and indexer[0] != slice(None):
if val == "a" and not warn_copy_on_write:
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype is deprecated"
):
indexer_func(df)[indexer] = val
if val == "a" and warn_copy_on_write:
with tm.assert_produces_warning(
FutureWarning, match="incompatible dtype|Setting a value on a view"
):
indexer_func(df)[indexer] = val
else:
with tm.assert_cow_warning(warn_copy_on_write and val == 100):
indexer_func(df)[indexer] = val
Expand Down
21 changes: 11 additions & 10 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,8 @@ def test_setitem_frame_upcast(self):
# needs upcasting
df = DataFrame([[1, 2, "foo"], [3, 4, "bar"]], columns=["A", "B", "C"])
df2 = df.copy()
df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5
expected = df.reindex(columns=["A", "B"])
expected += 0.5
expected["C"] = df["C"]
Expand Down Expand Up @@ -1387,20 +1388,20 @@ def test_loc_expand_empty_frame_keep_midx_names(self):
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize(
"val, idxr, warn",
"val, idxr",
[
("x", "a", None), # TODO: this should warn as well
("x", ["a"], None), # TODO: this should warn as well
(1, "a", None), # TODO: this should warn as well
(1, ["a"], FutureWarning),
("x", "a"),
("x", ["a"]),
(1, "a"),
(1, ["a"]),
],
)
def test_loc_setitem_rhs_frame(self, idxr, val, warn):
def test_loc_setitem_rhs_frame(self, idxr, val):
# GH#47578
df = DataFrame({"a": [1, 2]})

with tm.assert_produces_warning(
warn, match="Setting an item of incompatible dtype"
FutureWarning, match="Setting an item of incompatible dtype"
):
df.loc[:, idxr] = DataFrame({"a": [val, 11]}, index=[1, 2])
expected = DataFrame({"a": [np.nan, val]})
Expand Down Expand Up @@ -1996,7 +1997,7 @@ def _check_setitem_invalid(self, df, invalid, indexer, warn):
np.datetime64("NaT"),
np.timedelta64("NaT"),
]
_indexers = [0, [0], slice(0, 1), [True, False, False]]
_indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]

@pytest.mark.parametrize(
"invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
Expand All @@ -2010,7 +2011,7 @@ def test_setitem_validation_scalar_bool(self, invalid, indexer):
@pytest.mark.parametrize("indexer", _indexers)
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
df = DataFrame({"a": [1, 2, 3]}, dtype=any_int_numpy_dtype)
if isna(invalid) and invalid is not pd.NaT:
if isna(invalid) and invalid is not pd.NaT and not np.isnat(invalid):
warn = None
else:
warn = FutureWarning
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -1381,3 +1381,23 @@ def test_frame_setitem_empty_dataframe(self):
index=dti[:0],
)
tm.assert_frame_equal(df, expected)


def test_full_setter_loc_incompatible_dtype():
# https://github.com/pandas-dev/pandas/issues/55791
df = DataFrame({"a": [1, 2]})
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
df.loc[:, "a"] = True
expected = DataFrame({"a": [True, True]})
tm.assert_frame_equal(df, expected)

df = DataFrame({"a": [1, 2]})
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
df.loc[:, "a"] = {0: 3.5, 1: 4.5}
expected = DataFrame({"a": [3.5, 4.5]})
tm.assert_frame_equal(df, expected)

df = DataFrame({"a": [1, 2]})
df.loc[:, "a"] = {0: 3, 1: 4}
expected = DataFrame({"a": [3, 4]})
tm.assert_frame_equal(df, expected)
5 changes: 1 addition & 4 deletions pandas/tests/frame/methods/test_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,8 @@ def test_update_with_different_dtype(self, using_copy_on_write):
# GH#3217
df = DataFrame({"a": [1, 3], "b": [np.nan, 2]})
df["c"] = np.nan
if using_copy_on_write:
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
df.update({"c": Series(["foo"], index=[0])})
else:
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
df["c"].update(Series(["foo"], index=[0]))

expected = DataFrame(
{
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2857,7 +2857,7 @@ def test_dict_data_arrow_column_expansion(self, key_val, col_vals, col_type):
)
result = DataFrame({key_val: [1, 2]}, columns=cols)
expected = DataFrame([[1, np.nan], [2, np.nan]], columns=cols)
expected.iloc[:, 1] = expected.iloc[:, 1].astype(object)
expected.isetitem(1, expected.iloc[:, 1].astype(object))
tm.assert_frame_equal(result, expected)


Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,8 @@ def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(

# if the assigned values cannot be held by existing integer arrays,
# we cast
df.iloc[:, 0] = df.iloc[:, 0] + 0.5
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
df.iloc[:, 0] = df.iloc[:, 0] + 0.5
if not using_array_manager:
assert len(df._mgr.blocks) == 2

Expand Down Expand Up @@ -1471,6 +1472,7 @@ def test_iloc_setitem_pure_position_based(self):
def test_iloc_nullable_int64_size_1_nan(self):
# GH 31861
result = DataFrame({"a": ["test"], "b": [np.nan]})
result.loc[:, "b"] = result.loc[:, "b"].astype("Int64")
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
result.loc[:, "b"] = result.loc[:, "b"].astype("Int64")
expected = DataFrame({"a": ["test"], "b": array([NA], dtype="Int64")})
tm.assert_frame_equal(result, expected)
25 changes: 17 additions & 8 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,8 @@ def test_loc_setitem_consistency(self, frame_for_consistency, val):
}
)
df = frame_for_consistency.copy()
df.loc[:, "date"] = val
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
df.loc[:, "date"] = val
tm.assert_frame_equal(df, expected)

def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
Expand All @@ -598,7 +599,8 @@ def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
}
)
df = frame_for_consistency.copy()
df.loc[:, "date"] = "foo"
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
df.loc[:, "date"] = "foo"
tm.assert_frame_equal(df, expected)

def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency):
Expand All @@ -611,14 +613,16 @@ def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency):
}
)
df = frame_for_consistency.copy()
df.loc[:, "date"] = 1.0
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
df.loc[:, "date"] = 1.0
tm.assert_frame_equal(df, expected)

def test_loc_setitem_consistency_single_row(self):
# GH 15494
# setting on frame with single row
df = DataFrame({"date": Series([Timestamp("20180101")])})
df.loc[:, "date"] = "string"
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
df.loc[:, "date"] = "string"
expected = DataFrame({"date": Series(["string"])})
tm.assert_frame_equal(df, expected)

Expand Down Expand Up @@ -678,9 +682,10 @@ def test_loc_setitem_consistency_slice_column_len(self):

# timedelta64[m] -> float, so this cannot be done inplace, so
# no warning
df.loc[:, ("Respondent", "Duration")] = df.loc[
:, ("Respondent", "Duration")
] / Timedelta(60_000_000_000)
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
df.loc[:, ("Respondent", "Duration")] = df.loc[
:, ("Respondent", "Duration")
] / Timedelta(60_000_000_000)

expected = Series(
[23.0, 12.0, 14.0, 36.0], index=df.index, name=("Respondent", "Duration")
Expand Down Expand Up @@ -1487,7 +1492,11 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
# if result started off with object dtype, then the .loc.__setitem__
# below would retain object dtype
result = DataFrame(index=idx, columns=["var"], dtype=np.float64)
result.loc[:, idxer] = expected
with tm.assert_produces_warning(
FutureWarning if idxer == "var" else None, match="incompatible dtype"
):
# See https://github.com/pandas-dev/pandas/issues/56223
result.loc[:, idxer] = expected
tm.assert_frame_equal(result, expected)

def test_loc_setitem_time_key(self, using_array_manager):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def test_frame_non_unique_columns(self, orient, data):
# in milliseconds; these are internally stored in nanosecond,
# so divide to get where we need
# TODO: a to_epoch method would also solve; see GH 14772
expected.iloc[:, 0] = expected.iloc[:, 0].astype(np.int64) // 1000000
expected.isetitem(0, expected.iloc[:, 0].astype(np.int64) // 1000000)
elif orient == "split":
expected = df
expected.columns = ["x", "x.1"]
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2984,9 +2984,9 @@ def test_merge_empty_frames_column_order(left_empty, right_empty):
if left_empty and right_empty:
expected = expected.iloc[:0]
elif left_empty:
expected.loc[:, "B"] = np.nan
expected["B"] = np.nan
elif right_empty:
expected.loc[:, ["C", "D"]] = np.nan
expected[["C", "D"]] = np.nan
tm.assert_frame_equal(result, expected)


Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/series/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ def _check_setitem_invalid(self, ser, invalid, indexer, warn):
np.datetime64("NaT"),
np.timedelta64("NaT"),
]
_indexers = [0, [0], slice(0, 1), [True, False, False]]
_indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)]

@pytest.mark.parametrize(
"invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)]
Expand All @@ -505,7 +505,7 @@ def test_setitem_validation_scalar_bool(self, invalid, indexer):
@pytest.mark.parametrize("indexer", _indexers)
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
ser = Series([1, 2, 3], dtype=any_int_numpy_dtype)
if isna(invalid) and invalid is not NaT:
if isna(invalid) and invalid is not NaT and not np.isnat(invalid):
warn = None
else:
warn = FutureWarning
Expand Down