diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index e486235f044f5..61ecbff96ac7d 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -779,6 +779,7 @@ Setting values by assigning categorical data will also check that the ``categori Assigning a ``Categorical`` to parts of a column of other types will use the values: .. ipython:: python + :okwarning: df = pd.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["a", "a", "a", "a", "a"]}) df.loc[1:2, "a"] = pd.Categorical(["b", "b"], categories=["a", "b"]) diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst index 1bbbbdc7e5410..1dae2e8463c27 100644 --- a/doc/source/whatsnew/v0.21.0.rst +++ b/doc/source/whatsnew/v0.21.0.rst @@ -425,13 +425,12 @@ Note that this also changes the sum of an empty ``Series``. Previously this alwa In [1]: pd.Series([]).sum() Out[1]: 0 -but for consistency with the all-NaN case, this was changed to return NaN as well: +but for consistency with the all-NaN case, this was changed to return 0 as well: -.. ipython:: python - :okwarning: - - pd.Series([]).sum() +.. code-block:: ipython + In [2]: pd.Series([]).sum() + Out[2]: 0 .. _whatsnew_0210.api_breaking.loc: @@ -755,10 +754,16 @@ Previously assignments, ``.where()`` and ``.fillna()`` with a ``bool`` assignmen New behavior -.. ipython:: python +.. code-block:: ipython - s[1] = True - s + In [7]: s[1] = True + + In [8]: s + Out[8]: + 0 1 + 1 True + 2 3 + Length: 3, dtype: object Previously, as assignment to a datetimelike with a non-datetimelike would coerce the non-datetime-like item being assigned (:issue:`14145`). diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 260213e2ae760..17aab87b93f8e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -501,13 +501,17 @@ Consistent casting with setting into Boolean Series Setting non-boolean values into a :class:`Series` with ``dtype=bool`` now consistently casts to ``dtype=object`` (:issue:`38709`) -.. ipython:: python +.. code-block:: ipython + + In [1]: orig = pd.Series([True, False]) + + In [2]: ser = orig.copy() + + In [3]: ser.iloc[1] = np.nan - orig = pd.Series([True, False]) - ser = orig.copy() - ser.iloc[1] = np.nan - ser2 = orig.copy() - ser2.iloc[1] = 2.0 + In [4]: ser2 = orig.copy() + + In [5]: ser2.iloc[1] = 2.0 *Previous behavior*: @@ -527,10 +531,19 @@ casts to ``dtype=object`` (:issue:`38709`) *New behavior*: -.. ipython:: python +.. code-block:: ipython + + In [1]: ser + Out [1]: + 0 True + 1 NaN + dtype: object - ser - ser2 + In [2]:ser2 + Out [2]: + 0 True + 1 2.0 + dtype: object .. _whatsnew_130.notable_bug_fixes.rolling_groupby_column: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2d102de879df0..66d07f1ab9cf9 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -10,6 +10,7 @@ cast, final, ) +import warnings import numpy as np @@ -41,6 +42,7 @@ ) from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.astype import ( @@ -441,7 +443,7 @@ def split_and_operate(self, func, *args, **kwargs) -> list[Block]: # Up/Down-casting @final - def coerce_to_target_dtype(self, other) -> Block: + def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: """ coerce the current block to a dtype compat for other we will return a block, possibly object, and not raise @@ -450,7 +452,21 @@ def coerce_to_target_dtype(self, other) -> Block: and will receive the same block """ new_dtype = find_result_type(self.values.dtype, other) - + if warn_on_upcast: + warnings.warn( + f"Setting an item of incompatible dtype is deprecated " + "and will raise in a future error of pandas. " + f"Value '{other}' has dtype incompatible with {self.values.dtype}, " + "please explicitly cast to a compatible dtype first.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if self.dtype == new_dtype: + raise AssertionError( + f"Did not expect new dtype {new_dtype} to equal self.dtype " + f"{self.values.dtype}. Please report a bug at " + "https://github.com/pandas-dev/pandas/issues." + ) return self.astype(new_dtype, copy=False) @final @@ -1109,7 +1125,7 @@ def setitem(self, indexer, value, using_cow: bool = False) -> Block: casted = np_can_hold_element(values.dtype, value) except LossySetitemError: # current dtype cannot store value, coerce to common dtype - nb = self.coerce_to_target_dtype(value) + nb = self.coerce_to_target_dtype(value, warn_on_upcast=True) return nb.setitem(indexer, value) else: if self.dtype == _dtype_obj: @@ -1175,7 +1191,9 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: if not is_list_like(new): # using just new[indexer] can't save us the need to cast - return self.coerce_to_target_dtype(new).putmask(mask, new) + return self.coerce_to_target_dtype( + new, warn_on_upcast=True + ).putmask(mask, new) else: indexer = mask.nonzero()[0] nb = self.setitem(indexer, new[indexer], using_cow=using_cow) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 5ef0abbb423ac..c58aeecad22e3 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -925,11 +925,20 @@ def test_column_as_series_set_with_upcast( s[0] = "foo" expected = Series([1, 2, 3], name="a") elif using_copy_on_write or using_array_manager: - s[0] = "foo" + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + s[0] = "foo" expected = Series(["foo", 2, 3], dtype=object, name="a") else: with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): + msg = "|".join( + [ + "A value is trying to be set on a copy of a slice from a DataFrame", + "Setting an item of incompatible dtype is deprecated", + ] + ) + with tm.assert_produces_warning( + (SettingWithCopyWarning, FutureWarning), match=msg + ): s[0] = "foo" expected = Series(["foo", 2, 3], dtype=object, name="a") @@ -1020,7 +1029,10 @@ def test_dataframe_add_column_from_series(backend, using_copy_on_write): ], ) def test_set_value_copy_only_necessary_column( - using_copy_on_write, indexer_func, indexer, val + using_copy_on_write, + indexer_func, + indexer, + val, ): # When setting inplace, only copy column that is modified instead of the whole # block (by splitting the block) @@ -1029,7 +1041,13 @@ def test_set_value_copy_only_necessary_column( df_orig = df.copy() view = df[:] - indexer_func(df)[indexer] = val + if val == "a" and indexer[0] != slice(None): + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype is deprecated" + ): + indexer_func(df)[indexer] = val + else: + indexer_func(df)[indexer] = val if using_copy_on_write: assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index bd5895bc5d970..f5dc8d7ee0f80 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1405,15 +1405,18 @@ def test_putmask_aligns_rhs_no_reference(using_copy_on_write, dtype): assert np.shares_memory(arr_a, get_array(df, "a")) -@pytest.mark.parametrize("val, exp", [(5.5, True), (5, False)]) -def test_putmask_dont_copy_some_blocks(using_copy_on_write, val, exp): +@pytest.mark.parametrize( + "val, exp, warn", [(5.5, True, FutureWarning), (5, False, None)] +) +def test_putmask_dont_copy_some_blocks(using_copy_on_write, val, exp, warn): df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5}) view = df[:] df_orig = df.copy() indexer = DataFrame( [[True, False, False], [True, False, False]], columns=list("abc") ) - df[indexer] = val + with tm.assert_produces_warning(warn, match="incompatible dtype"): + df[indexer] = val if using_copy_on_write: assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) diff --git a/pandas/tests/frame/indexing/test_coercion.py b/pandas/tests/frame/indexing/test_coercion.py index 0e154d0e1d42b..ba0d8613b6228 100644 --- a/pandas/tests/frame/indexing/test_coercion.py +++ b/pandas/tests/frame/indexing/test_coercion.py @@ -51,19 +51,31 @@ def test_37477(): expected = DataFrame({"A": [1, 2, 3], "B": [3, 1.2, 5]}) df = orig.copy() - df.at[1, "B"] = 1.2 + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + df.at[1, "B"] = 1.2 tm.assert_frame_equal(df, expected) df = orig.copy() - df.loc[1, "B"] = 1.2 + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + df.loc[1, "B"] = 1.2 tm.assert_frame_equal(df, expected) df = orig.copy() - df.iat[1, 1] = 1.2 + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + df.iat[1, 1] = 1.2 tm.assert_frame_equal(df, expected) df = orig.copy() - df.iloc[1, 1] = 1.2 + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + df.iloc[1, 1] = 1.2 tm.assert_frame_equal(df, expected) @@ -94,11 +106,17 @@ def test_26395(indexer_al): expected = DataFrame({"D": [0, 0, 2]}, index=["A", "B", "C"], dtype=np.int64) tm.assert_frame_equal(df, expected) - indexer_al(df)["C", "D"] = 44.5 + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + indexer_al(df)["C", "D"] = 44.5 expected = DataFrame({"D": [0, 0, 44.5]}, index=["A", "B", "C"], dtype=np.float64) tm.assert_frame_equal(df, expected) - indexer_al(df)["C", "D"] = "hello" + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + indexer_al(df)["C", "D"] = "hello" expected = DataFrame({"D": [0, 0, "hello"]}, index=["A", "B", "C"], dtype=object) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 722f61de3f43a..e62d35ade149d 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -335,12 +335,18 @@ def test_setitem(self, float_frame, using_copy_on_write): def test_setitem2(self): # dtype changing GH4204 df = DataFrame([[0, 0]]) - df.iloc[0] = np.nan + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + df.iloc[0] = np.nan expected = DataFrame([[np.nan, np.nan]]) tm.assert_frame_equal(df, expected) df = DataFrame([[0, 0]]) - df.loc[0] = np.nan + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + df.loc[0] = np.nan tm.assert_frame_equal(df, expected) def test_setitem_boolean(self, float_frame): @@ -1332,12 +1338,22 @@ def test_loc_expand_empty_frame_keep_midx_names(self): ) tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("val", ["x", 1]) - @pytest.mark.parametrize("idxr", ["a", ["a"]]) - def test_loc_setitem_rhs_frame(self, idxr, val): + @pytest.mark.parametrize( + "val, idxr, warn", + [ + ("x", "a", None), # TODO: this should warn as well + ("x", ["a"], None), # TODO: this should warn as well + (1, "a", None), # TODO: this should warn as well + (1, ["a"], FutureWarning), + ], + ) + def test_loc_setitem_rhs_frame(self, idxr, val, warn): # GH#47578 df = DataFrame({"a": [1, 2]}) - with tm.assert_produces_warning(None): + + with tm.assert_produces_warning( + warn, match="Setting an item of incompatible dtype" + ): df.loc[:, idxr] = DataFrame({"a": [val, 11]}, index=[1, 2]) expected = DataFrame({"a": [np.nan, val]}) tm.assert_frame_equal(df, expected) @@ -1537,8 +1553,11 @@ def test_setitem(self, uint64_frame): # With NaN: because uint64 has no NaN element, # the column should be cast to object. df2 = df.copy() - df2.iloc[1, 1] = pd.NaT - df2.iloc[1, 2] = pd.NaT + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + df2.iloc[1, 1] = pd.NaT + df2.iloc[1, 2] = pd.NaT result = df2["B"] tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) tm.assert_series_equal( @@ -1851,3 +1870,54 @@ def test_setitem_dict_and_set_disallowed_multiindex(self, key): ) with pytest.raises(TypeError, match="as an indexer is not supported"): df.loc[key] = 1 + + +class TestSetitemValidation: + # This is adapted from pandas/tests/arrays/masked/test_indexing.py + # but checks for warnings instead of errors. + def _check_setitem_invalid(self, df, invalid, indexer): + msg = "Setting an item of incompatible dtype is deprecated" + msg = re.escape(msg) + + orig_df = df.copy() + + # iloc + with tm.assert_produces_warning(FutureWarning, match=msg): + df.iloc[indexer, 0] = invalid + df = orig_df.copy() + + # loc + with tm.assert_produces_warning(FutureWarning, match=msg): + df.loc[indexer, "a"] = invalid + df = orig_df.copy() + + _invalid_scalars = [ + 1 + 2j, + "True", + "1", + "1.0", + pd.NaT, + np.datetime64("NaT"), + np.timedelta64("NaT"), + ] + _indexers = [0, [0], slice(0, 1), [True, False, False]] + + @pytest.mark.parametrize( + "invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)] + ) + @pytest.mark.parametrize("indexer", _indexers) + def test_setitem_validation_scalar_bool(self, invalid, indexer): + df = DataFrame({"a": [True, False, False]}, dtype="bool") + self._check_setitem_invalid(df, invalid, indexer) + + @pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)]) + @pytest.mark.parametrize("indexer", _indexers) + def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer): + df = DataFrame({"a": [1, 2, 3]}, dtype=any_int_numpy_dtype) + self._check_setitem_invalid(df, invalid, indexer) + + @pytest.mark.parametrize("invalid", _invalid_scalars + [True]) + @pytest.mark.parametrize("indexer", _indexers) + def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer): + df = DataFrame({"a": [1, 2, None]}, dtype=float_numpy_dtype) + self._check_setitem_invalid(df, invalid, indexer) diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py index 8d7a5cbcc08e0..ff6b445d19dce 100644 --- a/pandas/tests/frame/indexing/test_set_value.py +++ b/pandas/tests/frame/indexing/test_set_value.py @@ -6,6 +6,7 @@ DataFrame, isna, ) +import pandas._testing as tm class TestSetValue: @@ -25,11 +26,17 @@ def test_set_value_resize(self, float_frame): assert float_frame._get_value("foobar", "qux") == 0 res = float_frame.copy() - res._set_value("foobar", "baz", "sam") + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + res._set_value("foobar", "baz", "sam") assert res["baz"].dtype == np.object_ res = float_frame.copy() - res._set_value("foobar", "baz", True) + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + res._set_value("foobar", "baz", True) assert res["baz"].dtype == np.object_ res = float_frame.copy() @@ -37,7 +44,10 @@ def test_set_value_resize(self, float_frame): assert is_float_dtype(res["baz"]) assert isna(res["baz"].drop(["foobar"])).all() - res._set_value("foobar", "baz", "sam") + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + res._set_value("foobar", "baz", "sam") assert res.loc["foobar", "baz"] == "sam" def test_set_value_with_index_dtype_change(self): diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 562f2fbe55c25..e7eb5f78ba066 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -164,7 +164,7 @@ def test_where_invalid(self): with pytest.raises(ValueError, match=msg): df.mask(0) - def test_where_set(self, where_frame, float_string_frame): + def test_where_set(self, where_frame, float_string_frame, mixed_int_frame): # where inplace def _check_set(df, cond, check_dtypes=True): @@ -189,6 +189,8 @@ def _check_set(df, cond, check_dtypes=True): with pytest.raises(TypeError, match=msg): df > 0 return + if df is mixed_int_frame: + df = df.astype("float64") cond = df > 0 _check_set(df, cond) @@ -503,7 +505,8 @@ def test_where_axis_with_upcast(self): tm.assert_frame_equal(result, expected) result = df.copy() - return_value = result.where(mask, ser, axis="index", inplace=True) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + return_value = result.where(mask, ser, axis="index", inplace=True) assert return_value is None tm.assert_frame_equal(result, expected) @@ -518,7 +521,8 @@ def test_where_axis_with_upcast(self): } ) result = df.copy() - return_value = result.where(mask, ser, axis="columns", inplace=True) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + return_value = result.where(mask, ser, axis="columns", inplace=True) assert return_value is None tm.assert_frame_equal(result, expected) @@ -569,11 +573,13 @@ def test_where_axis_multiple_dtypes(self): result = df.where(mask, d1, axis="index") tm.assert_frame_equal(result, expected) result = df.copy() - return_value = result.where(mask, d1, inplace=True) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + return_value = result.where(mask, d1, inplace=True) assert return_value is None tm.assert_frame_equal(result, expected) result = df.copy() - return_value = result.where(mask, d1, inplace=True, axis="index") + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + return_value = result.where(mask, d1, inplace=True, axis="index") assert return_value is None tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index e8a9c418b1d98..5738a25f26fcb 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -147,7 +147,8 @@ def test_update_with_different_dtype(self, using_copy_on_write): if using_copy_on_write: df.update({"c": Series(["foo"], index=[0])}) else: - df["c"].update(Series(["foo"], index=[0])) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df["c"].update(Series(["foo"], index=[0])) expected = DataFrame({"a": [1, 3], "b": [np.nan, 2], "c": ["foo", np.nan]}) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c71a0dd5f92b2..31f404258a9bb 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -23,6 +23,7 @@ import pytest import pytz +from pandas._libs import lib from pandas.errors import IntCastingNaNError import pandas.util._test_decorators as td @@ -2548,8 +2549,14 @@ def check_views(c_only: bool = False): check_views() # TODO: most of the rest of this test belongs in indexing tests - df.iloc[0, 0] = 0 - df.iloc[0, 1] = 0 + # TODO: 'm' and 'M' should warn + if lib.is_np_dtype(df.dtypes.iloc[0], "fciuOmM"): + warn = None + else: + warn = FutureWarning + with tm.assert_produces_warning(warn, match="incompatible dtype"): + df.iloc[0, 0] = 0 + df.iloc[0, 1] = 0 if not copy: check_views(True) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index e6f44359a1a62..716368f7e35c5 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -217,7 +217,10 @@ def test_multiindex_assignment_single_dtype(self, using_copy_on_write): tm.assert_numpy_array_equal(view, exp.values) # arr + 0.5 cannot be cast losslessly to int, so we upcast - df.loc[4, "c"] = arr + 0.5 + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + df.loc[4, "c"] = arr + 0.5 result = df.loc[4, "c"] exp = exp + 0.5 tm.assert_series_equal(result, exp) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index d2224988b70fc..6488ff2e64ea7 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -457,7 +457,7 @@ def test_detect_chained_assignment_changing_dtype( df.loc[2]["D"] = "foo" with tm.raises_chained_assignment_error(): df.loc[2]["C"] = "foo" - with tm.raises_chained_assignment_error(): + with tm.raises_chained_assignment_error(extra_warnings=(FutureWarning,)): df["C"][2] = "foo" tm.assert_frame_equal(df, df_original) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 21036598f46df..50e7e8633c5c4 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -184,7 +184,10 @@ def test_setitem_dtype_upcast(self): df["c"] = np.nan assert df["c"].dtype == np.float64 - df.loc[0, "c"] = "foo" + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + df.loc[0, "c"] = "foo" expected = DataFrame( [{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}] ) @@ -200,7 +203,10 @@ def test_setitem_dtype_upcast2(self, val): ) left = df.copy() - left.loc["a", "bar"] = val + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + left.loc["a", "bar"] = val right = DataFrame( [[0, val, 2], [3, 4, 5]], index=list("ab"), @@ -217,7 +223,10 @@ def test_setitem_dtype_upcast3(self): index=list("ab"), columns=["foo", "bar", "baz"], ) - left.loc["a", "bar"] = "wxyz" + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + left.loc["a", "bar"] = "wxyz" right = DataFrame( [[0, "wxyz", 0.2], [0.3, 0.4, 0.5]], @@ -696,7 +705,8 @@ def run_tests(df, rhs, right_loc, right_iloc): frame["jolie"] = frame["jolie"].map(lambda x: f"@{x}") right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0] right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"] - run_tests(df, rhs, right_loc, right_iloc) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + run_tests(df, rhs, right_loc, right_iloc) @pytest.mark.parametrize( "idx", [_mklbl("A", 20), np.arange(20) + 100, np.linspace(100, 150, 20)] @@ -789,43 +799,49 @@ def test_label_indexing_on_nan(self, nulls_fixture): class TestDataframeNoneCoercion: EXPECTED_SINGLE_ROW_RESULTS = [ # For numeric series, we should coerce to NaN. - ([1, 2, 3], [np.nan, 2, 3]), - ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]), + ([1, 2, 3], [np.nan, 2, 3], FutureWarning), + ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0], None), # For datetime series, we should coerce to NaT. ( [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)], + None, ), # For objects, we should preserve the None value. - (["foo", "bar", "baz"], [None, "bar", "baz"]), + (["foo", "bar", "baz"], [None, "bar", "baz"], None), ] @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) def test_coercion_with_loc(self, expected): - start_data, expected_result = expected + start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - start_dataframe.loc[0, ["foo"]] = None + with tm.assert_produces_warning(warn, match="incompatible dtype"): + start_dataframe.loc[0, ["foo"]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) def test_coercion_with_setitem_and_dataframe(self, expected): - start_data, expected_result = expected + start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + with tm.assert_produces_warning(warn, match="incompatible dtype"): + start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) def test_none_coercion_loc_and_dataframe(self, expected): - start_data, expected_result = expected + start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - start_dataframe.loc[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + with tm.assert_produces_warning(warn, match="incompatible dtype"): + start_dataframe.loc[ + start_dataframe["foo"] == start_dataframe["foo"][0] + ] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -839,7 +855,10 @@ def test_none_coercion_mixed_dtypes(self): "d": ["a", "b", "c"], } ) - start_dataframe.iloc[0] = None + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + start_dataframe.iloc[0] = None exp = DataFrame( { diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4017a0e3a2f80..a4c50058103ed 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -383,7 +383,10 @@ def test_loc_setitem_slice(self): df2 = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") ix = df1["a"] == 1 newb2 = df2.loc[ix, "b"] - df1.loc[ix, "b"] = newb2 + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + df1.loc[ix, "b"] = newb2 expected = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") tm.assert_frame_equal(df2, expected) @@ -1416,8 +1419,11 @@ def test_loc_setitem_categorical_values_partial_column_slice(self): # the Categorical df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")}) exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")}) - df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"]) - df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"]) + df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp) def test_loc_setitem_single_row_categorical(self): @@ -1609,7 +1615,10 @@ def test_loc_setitem_cast2(self): # dtype conversion on setting df = DataFrame(np.random.rand(30, 3), columns=tuple("ABC")) df["event"] = np.nan - df.loc[10, "event"] = "foo" + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + df.loc[10, "event"] = "foo" result = df.dtypes expected = Series( [np.dtype("float64")] * 3 + [np.dtype("object")], @@ -2929,7 +2938,8 @@ def test_loc_setitem_uint8_upcast(value): # GH#26049 df = DataFrame([1, 2, 3, 4], columns=["col1"], dtype="uint8") - df.loc[2, "col1"] = value # value that can't be held in uint8 + with tm.assert_produces_warning(FutureWarning, match="item of incompatible dtype"): + df.loc[2, "col1"] = value # value that can't be held in uint8 expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="uint16") tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index dfc8afbdf3acb..aacbcc59068bd 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -13,6 +13,7 @@ Index, IndexSlice, MultiIndex, + NaT, Series, Timedelta, Timestamp, @@ -446,3 +447,59 @@ def test_setitem_dict_and_set_disallowed_multiindex(self, key): ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)])) with pytest.raises(TypeError, match="as an indexer is not supported"): ser.loc[key] = 1 + + +class TestSetitemValidation: + # This is adapted from pandas/tests/arrays/masked/test_indexing.py + # but checks for warnings instead of errors. + def _check_setitem_invalid(self, ser, invalid, indexer): + msg = "Setting an item of incompatible dtype is deprecated" + msg = re.escape(msg) + + orig_ser = ser.copy() + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser[indexer] = invalid + ser = orig_ser.copy() + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser.iloc[indexer] = invalid + ser = orig_ser.copy() + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser.loc[indexer] = invalid + ser = orig_ser.copy() + + with tm.assert_produces_warning(FutureWarning, match=msg): + ser[:] = invalid + + _invalid_scalars = [ + 1 + 2j, + "True", + "1", + "1.0", + NaT, + np.datetime64("NaT"), + np.timedelta64("NaT"), + ] + _indexers = [0, [0], slice(0, 1), [True, False, False]] + + @pytest.mark.parametrize( + "invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)] + ) + @pytest.mark.parametrize("indexer", _indexers) + def test_setitem_validation_scalar_bool(self, invalid, indexer): + ser = Series([True, False, False], dtype="bool") + self._check_setitem_invalid(ser, invalid, indexer) + + @pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)]) + @pytest.mark.parametrize("indexer", _indexers) + def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer): + ser = Series([1, 2, 3], dtype=any_int_numpy_dtype) + self._check_setitem_invalid(ser, invalid, indexer) + + @pytest.mark.parametrize("invalid", _invalid_scalars + [True]) + @pytest.mark.parametrize("indexer", _indexers) + def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer): + ser = Series([1, 2, None], dtype=float_numpy_dtype) + self._check_setitem_invalid(ser, invalid, indexer) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index e87a968dee323..3020348ff2a07 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -191,11 +191,14 @@ def test_setitem_series_object_dtype(self, indexer, ser_index): expected = Series([Series([42], index=[ser_index]), 0], dtype="object") tm.assert_series_equal(ser, expected) - @pytest.mark.parametrize("index, exp_value", [(0, 42), (1, np.nan)]) - def test_setitem_series(self, index, exp_value): + @pytest.mark.parametrize( + "index, exp_value, warn", [(0, 42, None), (1, np.nan, FutureWarning)] + ) + def test_setitem_series(self, index, exp_value, warn): # GH#38303 ser = Series([0, 0]) - ser.loc[0] = Series([42], index=[index]) + with tm.assert_produces_warning(warn, match="item of incompatible dtype"): + ser.loc[0] = Series([42], index=[index]) expected = Series([exp_value, 0]) tm.assert_series_equal(ser, expected) @@ -259,7 +262,10 @@ def test_setitem_mask_align_and_promote(self): mask = ts > 0 left = ts.copy() right = ts[mask].copy().map(str) - left[mask] = right + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + left[mask] = right expected = ts.map(lambda t: str(t) if t > 0 else t) tm.assert_series_equal(left, expected) @@ -267,7 +273,10 @@ def test_setitem_mask_promote_strs(self): ser = Series([0, 1, 2, 0]) mask = ser > 0 ser2 = ser[mask].map(str) - ser[mask] = ser2 + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + ser[mask] = ser2 expected = Series([0, "1", "2", 0]) tm.assert_series_equal(ser, expected) @@ -359,7 +368,10 @@ def test_setitem_with_bool_mask_and_values_matching_n_trues_in_length(self): def test_setitem_nan_with_bool(self): # GH 13034 result = Series([True, False, True]) - result[0] = np.nan + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + result[0] = np.nan expected = Series([np.nan, False, True], dtype=object) tm.assert_series_equal(result, expected) @@ -370,12 +382,18 @@ def test_setitem_mask_smallint_upcast(self): mask = np.array([True, False, True]) ser = orig.copy() - ser[mask] = Series(alt) + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + ser[mask] = Series(alt) expected = Series([999, 2, 1001]) tm.assert_series_equal(ser, expected) ser2 = orig.copy() - ser2.mask(mask, alt, inplace=True) + with tm.assert_produces_warning( + FutureWarning, match="item of incompatible dtype" + ): + ser2.mask(mask, alt, inplace=True) tm.assert_series_equal(ser2, expected) ser3 = orig.copy() @@ -548,22 +566,30 @@ def test_setitem_keep_precision(self, any_numeric_ea_dtype): expected = Series([1, 2, 10], dtype=any_numeric_ea_dtype) tm.assert_series_equal(ser, expected) - @pytest.mark.parametrize("indexer", [1, 2]) @pytest.mark.parametrize( - "na, target_na, dtype, target_dtype", + "na, target_na, dtype, target_dtype, indexer, warn", [ - (NA, NA, "Int64", "Int64"), - (NA, np.nan, "int64", "float64"), - (NaT, NaT, "int64", "object"), - (np.nan, NA, "Int64", "Int64"), - (np.nan, NA, "Float64", "Float64"), - (np.nan, np.nan, "int64", "float64"), + (NA, NA, "Int64", "Int64", 1, None), + (NA, NA, "Int64", "Int64", 2, None), + (NA, np.nan, "int64", "float64", 1, FutureWarning), + (NA, np.nan, "int64", "float64", 2, None), + (NaT, NaT, "int64", "object", 1, FutureWarning), + (NaT, NaT, "int64", "object", 2, None), + (np.nan, NA, "Int64", "Int64", 1, None), + (np.nan, NA, "Int64", "Int64", 2, None), + (np.nan, NA, "Float64", "Float64", 1, None), + (np.nan, NA, "Float64", "Float64", 2, None), + (np.nan, np.nan, "int64", "float64", 1, FutureWarning), + (np.nan, np.nan, "int64", "float64", 2, None), ], ) - def test_setitem_enlarge_with_na(self, na, target_na, dtype, target_dtype, indexer): + def test_setitem_enlarge_with_na( + self, na, target_na, dtype, target_dtype, indexer, warn + ): # GH#32346 ser = Series([1, 2], dtype=dtype) - ser[indexer] = na + with tm.assert_produces_warning(warn, match="item of incompatible dtype"): + ser[indexer] = na expected_values = [1, target_na] if indexer == 1 else [1, 2, target_na] expected = Series(expected_values, dtype=target_dtype) tm.assert_series_equal(ser, expected) @@ -652,7 +678,8 @@ def test_setitem_non_bool_into_bool(self, val, indexer_sli, unique): if not unique: ser.index = [1, 1] - indexer_sli(ser)[1] = val + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + indexer_sli(ser)[1] = val assert type(ser.iloc[1]) == type(val) expected = Series([True, val], dtype=object, index=ser.index) @@ -669,7 +696,8 @@ def test_setitem_boolean_array_into_npbool(self): ser[:2] = arr[:2] # no NAs -> can set inplace assert ser._values is values - ser[1:] = arr[1:] # has an NA -> cast to boolean dtype + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser[1:] = arr[1:] # has an NA -> cast to boolean dtype expected = Series(arr) tm.assert_series_equal(ser, expected) @@ -715,52 +743,64 @@ def _check_inplace(self, is_inplace, orig, arr, obj): # otherwise original array should be unchanged tm.assert_equal(arr, orig._values) - def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace): + def test_int_key(self, obj, key, expected, warn, val, indexer_sli, is_inplace): if not isinstance(key, int): return - self.check_indexer(obj, key, expected, val, indexer_sli, is_inplace) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + self.check_indexer(obj, key, expected, val, indexer_sli, is_inplace) if indexer_sli is tm.loc: - self.check_indexer(obj, key, expected, val, tm.at, is_inplace) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + self.check_indexer(obj, key, expected, val, tm.at, is_inplace) elif indexer_sli is tm.iloc: - self.check_indexer(obj, key, expected, val, tm.iat, is_inplace) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + self.check_indexer(obj, key, expected, val, tm.iat, is_inplace) rng = range(key, key + 1) - self.check_indexer(obj, rng, expected, val, indexer_sli, is_inplace) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + self.check_indexer(obj, rng, expected, val, indexer_sli, is_inplace) if indexer_sli is not tm.loc: # Note: no .loc because that handles slice edges differently slc = slice(key, key + 1) - self.check_indexer(obj, slc, expected, val, indexer_sli, is_inplace) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + self.check_indexer(obj, slc, expected, val, indexer_sli, is_inplace) ilkey = [key] - self.check_indexer(obj, ilkey, expected, val, indexer_sli, is_inplace) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + self.check_indexer(obj, ilkey, expected, val, indexer_sli, is_inplace) indkey = np.array(ilkey) - self.check_indexer(obj, indkey, expected, val, indexer_sli, is_inplace) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + self.check_indexer(obj, indkey, expected, val, indexer_sli, is_inplace) genkey = (x for x in [key]) - self.check_indexer(obj, genkey, expected, val, indexer_sli, is_inplace) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + self.check_indexer(obj, genkey, expected, val, indexer_sli, is_inplace) - def test_slice_key(self, obj, key, expected, val, indexer_sli, is_inplace): + def test_slice_key(self, obj, key, expected, warn, val, indexer_sli, is_inplace): if not isinstance(key, slice): return if indexer_sli is not tm.loc: # Note: no .loc because that handles slice edges differently - self.check_indexer(obj, key, expected, val, indexer_sli, is_inplace) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + self.check_indexer(obj, key, expected, val, indexer_sli, is_inplace) ilkey = list(range(len(obj)))[key] - self.check_indexer(obj, ilkey, expected, val, indexer_sli, is_inplace) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + self.check_indexer(obj, ilkey, expected, val, indexer_sli, is_inplace) indkey = np.array(ilkey) - self.check_indexer(obj, indkey, expected, val, indexer_sli, is_inplace) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + self.check_indexer(obj, indkey, expected, val, indexer_sli, is_inplace) genkey = (x for x in indkey) - self.check_indexer(obj, genkey, expected, val, indexer_sli, is_inplace) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + self.check_indexer(obj, genkey, expected, val, indexer_sli, is_inplace) - def test_mask_key(self, obj, key, expected, val, indexer_sli): + def test_mask_key(self, obj, key, expected, warn, val, indexer_sli): # setitem with boolean mask mask = np.zeros(obj.shape, dtype=bool) mask[key] = True @@ -773,10 +813,11 @@ def test_mask_key(self, obj, key, expected, val, indexer_sli): indexer_sli(obj)[mask] = val return - indexer_sli(obj)[mask] = val + with tm.assert_produces_warning(warn, match="incompatible dtype"): + indexer_sli(obj)[mask] = val tm.assert_series_equal(obj, expected) - def test_series_where(self, obj, key, expected, val, is_inplace): + def test_series_where(self, obj, key, expected, warn, val, is_inplace): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True @@ -803,7 +844,7 @@ def test_series_where(self, obj, key, expected, val, is_inplace): self._check_inplace(is_inplace, orig, arr, obj) - def test_index_where(self, obj, key, expected, val): + def test_index_where(self, obj, key, expected, warn, val): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True @@ -811,7 +852,7 @@ def test_index_where(self, obj, key, expected, val): expected_idx = Index(expected, dtype=expected.dtype) tm.assert_index_equal(res, expected_idx) - def test_index_putmask(self, obj, key, expected, val): + def test_index_putmask(self, obj, key, expected, warn, val): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True @@ -820,7 +861,7 @@ def test_index_putmask(self, obj, key, expected, val): @pytest.mark.parametrize( - "obj,expected,key", + "obj,expected,key,warn", [ pytest.param( # GH#45568 setting a valid NA value into IntervalDtype[int] should @@ -831,6 +872,7 @@ def test_index_putmask(self, obj, key, expected, val): dtype="interval[float64]", ), 1, + None, id="interval_int_na_value", ), pytest.param( @@ -838,12 +880,14 @@ def test_index_putmask(self, obj, key, expected, val): Series([2, 3, 4, 5, 6, 7, 8, 9, 10]), Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]), slice(None, None, 2), + FutureWarning, id="int_series_slice_key_step", ), pytest.param( Series([True, True, False, False]), Series([np.nan, True, np.nan, False], dtype=object), slice(None, None, 2), + FutureWarning, id="bool_series_slice_key_step", ), pytest.param( @@ -851,6 +895,7 @@ def test_index_putmask(self, obj, key, expected, val): Series(np.arange(10)), Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]), slice(None, 5), + FutureWarning, id="int_series_slice_key", ), pytest.param( @@ -858,6 +903,7 @@ def test_index_putmask(self, obj, key, expected, val): Series([1, 2, 3]), Series([np.nan, 2, 3]), 0, + FutureWarning, id="int_series_int_key", ), pytest.param( @@ -866,6 +912,7 @@ def test_index_putmask(self, obj, key, expected, val): Series([np.nan], dtype=object), # TODO: maybe go to float64 since we are changing the _whole_ Series? 0, + FutureWarning, id="bool_series_int_key_change_all", ), pytest.param( @@ -873,6 +920,7 @@ def test_index_putmask(self, obj, key, expected, val): Series([False, True]), Series([np.nan, True], dtype=object), 0, + FutureWarning, id="bool_series_int_key", ), ], @@ -921,6 +969,10 @@ def expected(self, dtype): def key(self): return 0 + @pytest.fixture + def warn(self): + return FutureWarning + class TestSetitemDT64IntoInt(SetitemCastingEquivalents): # GH#39619 dont cast dt64 to int when doing this setitem @@ -957,6 +1009,10 @@ def val(self, scalar, request): return scalar return box([scalar, scalar]) + @pytest.fixture + def warn(self): + return FutureWarning + class TestSetitemNAPeriodDtype(SetitemCastingEquivalents): # Setting compatible NA values into Series with PeriodDtype @@ -980,6 +1036,10 @@ def key(self, request): def val(self, request): return request.param + @pytest.fixture + def warn(self): + return None + class TestSetitemNADatetimeLikeDtype(SetitemCastingEquivalents): # some nat-like values should be cast to datetime64/timedelta64 when @@ -1029,6 +1089,10 @@ def expected(self, obj, val, is_inplace): def key(self): return 0 + @pytest.fixture + def warn(self): + return None + class TestSetitemMismatchedTZCastsToObject(SetitemCastingEquivalents): # GH#24024 @@ -1057,20 +1121,25 @@ def expected(self, obj, val): ) return expected + @pytest.fixture + def warn(self): + return None + @pytest.mark.parametrize( - "obj,expected", + "obj,expected,warn", [ # For numeric series, we should coerce to NaN. - (Series([1, 2, 3]), Series([np.nan, 2, 3])), - (Series([1.0, 2.0, 3.0]), Series([np.nan, 2.0, 3.0])), + (Series([1, 2, 3]), Series([np.nan, 2, 3]), FutureWarning), + (Series([1.0, 2.0, 3.0]), Series([np.nan, 2.0, 3.0]), None), # For datetime series, we should coerce to NaT. ( Series([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]), Series([NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]), + None, ), # For objects, we should preserve the None value. - (Series(["foo", "bar", "baz"]), Series([None, "bar", "baz"])), + (Series(["foo", "bar", "baz"]), Series([None, "bar", "baz"]), None), ], ) class TestSeriesNoneCoercion(SetitemCastingEquivalents): @@ -1114,6 +1183,10 @@ def expected(self, obj, val): idx = IntervalIndex(data, dtype="Interval[float64]") return Series(idx) + @pytest.fixture + def warn(self): + return None + class TestSetitemRangeIntoIntegerSeries(SetitemCastingEquivalents): # GH#44261 Setting a range with sufficiently-small integers into @@ -1139,13 +1212,20 @@ def expected(self, any_int_numpy_dtype): exp = Series([2, 3, 2, 3, 4], dtype=dtype) return exp + @pytest.fixture + def warn(self): + return None + @pytest.mark.parametrize( - "val", + "val, warn", [ - np.array([2.0, 3.0]), - np.array([2.5, 3.5]), - np.array([2**65, 2**65 + 1], dtype=np.float64), # all ints, but can't cast + (np.array([2.0, 3.0]), None), + (np.array([2.5, 3.5]), FutureWarning), + ( + np.array([2**65, 2**65 + 1], dtype=np.float64), + FutureWarning, + ), # all ints, but can't cast ], ) class TestSetitemFloatNDarrayIntoIntegerSeries(SetitemCastingEquivalents): @@ -1183,6 +1263,10 @@ def key(self): def expected(self): return Series([1, 512, 3], dtype=np.int16) + @pytest.fixture + def warn(self): + return FutureWarning + @pytest.mark.parametrize("val", [2**33 + 1.0, 2**33 + 1.1, 2**62]) class TestSmallIntegerSetitemUpcast(SetitemCastingEquivalents): @@ -1203,6 +1287,10 @@ def expected(self, val): dtype = "i8" return Series([val, 2, 3], dtype=dtype) + @pytest.fixture + def warn(self): + return FutureWarning + class CoercionTest(SetitemCastingEquivalents): # Tests ported from tests.indexing.test_coercion @@ -1219,7 +1307,8 @@ def expected(self, obj, key, val, exp_dtype): @pytest.mark.parametrize( - "val,exp_dtype", [(np.int32(1), np.int8), (np.int16(2**9), np.int16)] + "val,exp_dtype,warn", + [(np.int32(1), np.int8, None), (np.int16(2**9), np.int16, FutureWarning)], ) class TestCoercionInt8(CoercionTest): # previously test_setitem_series_int8 in tests.indexing.test_coercion @@ -1236,10 +1325,19 @@ class TestCoercionObject(CoercionTest): def obj(self): return Series(["a", "b", "c", "d"], dtype=object) + @pytest.fixture + def warn(self): + return None + @pytest.mark.parametrize( - "val,exp_dtype", - [(1, np.complex128), (1.1, np.complex128), (1 + 1j, np.complex128), (True, object)], + "val,exp_dtype,warn", + [ + (1, np.complex128, None), + (1.1, np.complex128, None), + (1 + 1j, np.complex128, None), + (True, object, FutureWarning), + ], ) class TestCoercionComplex(CoercionTest): # previously test_setitem_series_complex128 in tests.indexing.test_coercion @@ -1249,14 +1347,14 @@ def obj(self): @pytest.mark.parametrize( - "val,exp_dtype", + "val,exp_dtype,warn", [ - (1, object), - ("3", object), - (3, object), - (1.1, object), - (1 + 1j, object), - (True, bool), + (1, object, FutureWarning), + ("3", object, FutureWarning), + (3, object, FutureWarning), + (1.1, object, FutureWarning), + (1 + 1j, object, FutureWarning), + (True, bool, None), ], ) class TestCoercionBool(CoercionTest): @@ -1267,8 +1365,13 @@ def obj(self): @pytest.mark.parametrize( - "val,exp_dtype", - [(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], + "val,exp_dtype,warn", + [ + (1, np.int64, None), + (1.1, np.float64, FutureWarning), + (1 + 1j, np.complex128, FutureWarning), + (True, object, FutureWarning), + ], ) class TestCoercionInt64(CoercionTest): # previously test_setitem_series_int64 in tests.indexing.test_coercion @@ -1278,8 +1381,13 @@ def obj(self): @pytest.mark.parametrize( - "val,exp_dtype", - [(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)], + "val,exp_dtype,warn", + [ + (1, np.float64, None), + (1.1, np.float64, None), + (1 + 1j, np.complex128, FutureWarning), + (True, object, FutureWarning), + ], ) class TestCoercionFloat64(CoercionTest): # previously test_setitem_series_float64 in tests.indexing.test_coercion @@ -1289,27 +1397,28 @@ def obj(self): @pytest.mark.parametrize( - "val,exp_dtype", + "val,exp_dtype,warn", [ - (1, np.float32), + (1, np.float32, None), pytest.param( 1.1, np.float32, + None, marks=pytest.mark.xfail( reason="np.float32(1.1) ends up as 1.100000023841858, so " "np_can_hold_element raises and we cast to float64", ), ), - (1 + 1j, np.complex128), - (True, object), - (np.uint8(2), np.float32), - (np.uint32(2), np.float32), + (1 + 1j, np.complex128, FutureWarning), + (True, object, FutureWarning), + (np.uint8(2), np.float32, None), + (np.uint32(2), np.float32, None), # float32 cannot hold np.iinfo(np.uint32).max exactly # (closest it can hold is 4294967300.0 which off by 5.0), so # we cast to float64 - (np.uint32(np.iinfo(np.uint32).max), np.float64), - (np.uint64(2), np.float32), - (np.int64(2), np.float32), + (np.uint32(np.iinfo(np.uint32).max), np.float64, FutureWarning), + (np.uint64(2), np.float32, None), + (np.int64(2), np.float32, None), ], ) class TestCoercionFloat32(CoercionTest): @@ -1317,8 +1426,8 @@ class TestCoercionFloat32(CoercionTest): def obj(self): return Series([1.1, 2.2, 3.3, 4.4], dtype=np.float32) - def test_slice_key(self, obj, key, expected, val, indexer_sli, is_inplace): - super().test_slice_key(obj, key, expected, val, indexer_sli, is_inplace) + def test_slice_key(self, obj, key, expected, warn, val, indexer_sli, is_inplace): + super().test_slice_key(obj, key, expected, warn, val, indexer_sli, is_inplace) if type(val) is float: # the xfail would xpass bc test_slice_key short-circuits @@ -1336,6 +1445,10 @@ class TestCoercionDatetime64(CoercionTest): def obj(self): return Series(date_range("2011-01-01", freq="D", periods=4)) + @pytest.fixture + def warn(self): + return None + @pytest.mark.parametrize( "val,exp_dtype", @@ -1354,6 +1467,10 @@ def obj(self): tz = "US/Eastern" return Series(date_range("2011-01-01", freq="D", periods=4, tz=tz)) + @pytest.fixture + def warn(self): + return None + @pytest.mark.parametrize( "val,exp_dtype", @@ -1365,6 +1482,10 @@ class TestCoercionTimedelta64(CoercionTest): def obj(self): return Series(timedelta_range("1 day", periods=4)) + @pytest.fixture + def warn(self): + return None + @pytest.mark.parametrize( "val", ["foo", Period("2016", freq="Y"), Interval(1, 2, closed="both")] @@ -1381,6 +1502,10 @@ class TestPeriodIntervalCoercion(CoercionTest): def obj(self, request): return Series(request.param) + @pytest.fixture + def warn(self): + return None + def test_20643(): # closed by GH#45121 @@ -1389,42 +1514,51 @@ def test_20643(): expected = Series([0, 2.7, 2], index=["a", "b", "c"]) ser = orig.copy() - ser.at["b"] = 2.7 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser.at["b"] = 2.7 tm.assert_series_equal(ser, expected) ser = orig.copy() - ser.loc["b"] = 2.7 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser.loc["b"] = 2.7 tm.assert_series_equal(ser, expected) ser = orig.copy() - ser["b"] = 2.7 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser["b"] = 2.7 tm.assert_series_equal(ser, expected) ser = orig.copy() - ser.iat[1] = 2.7 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser.iat[1] = 2.7 tm.assert_series_equal(ser, expected) ser = orig.copy() - ser.iloc[1] = 2.7 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser.iloc[1] = 2.7 tm.assert_series_equal(ser, expected) orig_df = orig.to_frame("A") expected_df = expected.to_frame("A") df = orig_df.copy() - df.at["b", "A"] = 2.7 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.at["b", "A"] = 2.7 tm.assert_frame_equal(df, expected_df) df = orig_df.copy() - df.loc["b", "A"] = 2.7 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc["b", "A"] = 2.7 tm.assert_frame_equal(df, expected_df) df = orig_df.copy() - df.iloc[1, 0] = 2.7 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.iloc[1, 0] = 2.7 tm.assert_frame_equal(df, expected_df) df = orig_df.copy() - df.iat[1, 0] = 2.7 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.iat[1, 0] = 2.7 tm.assert_frame_equal(df, expected_df) @@ -1435,11 +1569,13 @@ def test_20643_comment(): expected = Series([np.nan, 1, 2], index=["a", "b", "c"]) ser = orig.copy() - ser.iat[0] = None + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser.iat[0] = None tm.assert_series_equal(ser, expected) ser = orig.copy() - ser.iloc[0] = None + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser.iloc[0] = None tm.assert_series_equal(ser, expected) @@ -1447,28 +1583,34 @@ def test_15413(): # fixed by GH#45121 ser = Series([1, 2, 3]) - ser[ser == 2] += 0.5 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser[ser == 2] += 0.5 expected = Series([1, 2.5, 3]) tm.assert_series_equal(ser, expected) ser = Series([1, 2, 3]) - ser[1] += 0.5 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser[1] += 0.5 tm.assert_series_equal(ser, expected) ser = Series([1, 2, 3]) - ser.loc[1] += 0.5 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser.loc[1] += 0.5 tm.assert_series_equal(ser, expected) ser = Series([1, 2, 3]) - ser.iloc[1] += 0.5 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser.iloc[1] += 0.5 tm.assert_series_equal(ser, expected) ser = Series([1, 2, 3]) - ser.iat[1] += 0.5 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser.iat[1] += 0.5 tm.assert_series_equal(ser, expected) ser = Series([1, 2, 3]) - ser.at[1] += 0.5 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser.at[1] += 0.5 tm.assert_series_equal(ser, expected) @@ -1477,7 +1619,8 @@ def test_32878_int_itemsize(): arr = np.arange(5).astype("i4") ser = Series(arr) val = np.int64(np.iinfo(np.int64).max) - ser[0] = val + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser[0] = val expected = Series([val, 1, 2, 3, 4], dtype=np.int64) tm.assert_series_equal(ser, expected) @@ -1489,7 +1632,8 @@ def test_32878_complex_itemsize(): val = val.astype("c16") # GH#32878 used to coerce val to inf+0.000000e+00j - ser[0] = val + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser[0] = val assert ser[0] == val expected = Series([val, 1, 2, 3, 4], dtype="c16") tm.assert_series_equal(ser, expected) @@ -1498,7 +1642,8 @@ def test_32878_complex_itemsize(): def test_37692(indexer_al): # GH#37692 ser = Series([1, 2, 3], index=["a", "b", "c"]) - indexer_al(ser)["b"] = "test" + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + indexer_al(ser)["b"] = "test" expected = Series([1, "test", 3], index=["a", "b", "c"], dtype=object) tm.assert_series_equal(ser, expected) @@ -1510,11 +1655,13 @@ def test_setitem_bool_int_float_consistency(indexer_sli): # as the setitem can be done losslessly for dtype in [np.float64, np.int64]: ser = Series(0, index=range(3), dtype=dtype) - indexer_sli(ser)[0] = True + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + indexer_sli(ser)[0] = True assert ser.dtype == object ser = Series(0, index=range(3), dtype=bool) - ser[0] = dtype(1) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser[0] = dtype(1) assert ser.dtype == object # 1.0 can be held losslessly, so no casting diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index 0c8cb493141b7..4de3b64294504 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -54,7 +54,13 @@ def test_where_unsafe_upcast(dtype, expected_dtype): values = [2.5, 3.5, 4.5, 5.5, 6.5] mask = s < 5 expected = Series(values + list(range(5, 10)), dtype=expected_dtype) - s[mask] = values + warn = ( + None + if np.dtype(dtype).kind == np.dtype(expected_dtype).kind == "f" + else FutureWarning + ) + with tm.assert_produces_warning(warn, match="incompatible dtype"): + s[mask] = values tm.assert_series_equal(s, expected) @@ -66,7 +72,8 @@ def test_where_unsafe(): mask = s > 5 expected = Series(list(range(6)) + values, dtype="float64") - s[mask] = values + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + s[mask] = values tm.assert_series_equal(s, expected) # see gh-3235 diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index f2ac5f1086625..408747ab19b24 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -206,7 +206,20 @@ def test_convert_dtypes( # Test that it is a copy copy = series.copy(deep=True) - result[result.notna()] = np.nan + if result.notna().sum() > 0 and result.dtype in [ + "int8", + "uint8", + "int16", + "uint16", + "int32", + "uint32", + "int64", + "uint64", + ]: + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + result[result.notna()] = np.nan + else: + result[result.notna()] = np.nan # Make sure original not changed tm.assert_series_equal(series, copy) diff --git a/pandas/tests/series/methods/test_update.py b/pandas/tests/series/methods/test_update.py index 5bf134fbeeb86..c38b2400f0f4e 100644 --- a/pandas/tests/series/methods/test_update.py +++ b/pandas/tests/series/methods/test_update.py @@ -41,34 +41,35 @@ def test_update(self, using_copy_on_write): tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( - "other, dtype, expected", + "other, dtype, expected, warn", [ # other is int - ([61, 63], "int32", Series([10, 61, 12], dtype="int32")), - ([61, 63], "int64", Series([10, 61, 12])), - ([61, 63], float, Series([10.0, 61.0, 12.0])), - ([61, 63], object, Series([10, 61, 12], dtype=object)), + ([61, 63], "int32", Series([10, 61, 12], dtype="int32"), None), + ([61, 63], "int64", Series([10, 61, 12]), None), + ([61, 63], float, Series([10.0, 61.0, 12.0]), None), + ([61, 63], object, Series([10, 61, 12], dtype=object), None), # other is float, but can be cast to int - ([61.0, 63.0], "int32", Series([10, 61, 12], dtype="int32")), - ([61.0, 63.0], "int64", Series([10, 61, 12])), - ([61.0, 63.0], float, Series([10.0, 61.0, 12.0])), - ([61.0, 63.0], object, Series([10, 61.0, 12], dtype=object)), + ([61.0, 63.0], "int32", Series([10, 61, 12], dtype="int32"), None), + ([61.0, 63.0], "int64", Series([10, 61, 12]), None), + ([61.0, 63.0], float, Series([10.0, 61.0, 12.0]), None), + ([61.0, 63.0], object, Series([10, 61.0, 12], dtype=object), None), # others is float, cannot be cast to int - ([61.1, 63.1], "int32", Series([10.0, 61.1, 12.0])), - ([61.1, 63.1], "int64", Series([10.0, 61.1, 12.0])), - ([61.1, 63.1], float, Series([10.0, 61.1, 12.0])), - ([61.1, 63.1], object, Series([10, 61.1, 12], dtype=object)), + ([61.1, 63.1], "int32", Series([10.0, 61.1, 12.0]), FutureWarning), + ([61.1, 63.1], "int64", Series([10.0, 61.1, 12.0]), FutureWarning), + ([61.1, 63.1], float, Series([10.0, 61.1, 12.0]), None), + ([61.1, 63.1], object, Series([10, 61.1, 12], dtype=object), None), # other is object, cannot be cast - ([(61,), (63,)], "int32", Series([10, (61,), 12])), - ([(61,), (63,)], "int64", Series([10, (61,), 12])), - ([(61,), (63,)], float, Series([10.0, (61,), 12.0])), - ([(61,), (63,)], object, Series([10, (61,), 12])), + ([(61,), (63,)], "int32", Series([10, (61,), 12]), FutureWarning), + ([(61,), (63,)], "int64", Series([10, (61,), 12]), FutureWarning), + ([(61,), (63,)], float, Series([10.0, (61,), 12.0]), FutureWarning), + ([(61,), (63,)], object, Series([10, (61,), 12]), None), ], ) - def test_update_dtypes(self, other, dtype, expected): + def test_update_dtypes(self, other, dtype, expected, warn): ser = Series([10, 11, 12], dtype=dtype) other = Series(other, index=[1, 3]) - ser.update(other) + with tm.assert_produces_warning(warn, match="item of incompatible dtype"): + ser.update(other) tm.assert_series_equal(ser, expected)