diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 32641319a6b96..15bd5a7379105 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -195,6 +195,7 @@ def array_equivalent_object( right: npt.NDArray[np.object_], ) -> bool: ... def has_infs(arr: np.ndarray) -> bool: ... # const floating[:] +def has_only_ints_or_nan(arr: np.ndarray) -> bool: ... # const floating[:] def get_reverse_indexer( indexer: np.ndarray, # const intp_t[:] length: int, diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 859cb8e5ebead..0c0610f72044e 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -530,6 +530,22 @@ def has_infs(floating[:] arr) -> bool: return ret +@cython.boundscheck(False) +@cython.wraparound(False) +def has_only_ints_or_nan(floating[:] arr) -> bool: + cdef: + floating val + intp_t i + + for i in range(len(arr)): + val = arr[i] + if (val != val) or (val == val): + continue + else: + return False + return True + + def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len): cdef: Py_ssize_t i, n = len(indices) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ffaeef14e42a5..bffc1251a0729 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -18,6 +18,7 @@ from pandas._config import using_copy_on_write from pandas._libs import ( + NaT, internals as libinternals, lib, writers, @@ -59,7 +60,10 @@ from pandas.core.dtypes.common import ( ensure_platform_int, is_1d_only_ea_dtype, + is_float_dtype, + is_integer_dtype, is_list_like, + is_scalar, is_string_dtype, ) from pandas.core.dtypes.dtypes import ( @@ -453,6 +457,25 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: and will receive the same block """ new_dtype = find_result_type(self.values.dtype, other) + + # In a future version of pandas, the default will be that + # setting `nan` into an integer series won't raise. + if ( + is_scalar(other) + and is_integer_dtype(self.values.dtype) + and isna(other) + and other is not NaT + ): + warn_on_upcast = False + elif ( + isinstance(other, np.ndarray) + and other.ndim == 1 + and is_integer_dtype(self.values.dtype) + and is_float_dtype(other.dtype) + and lib.has_only_ints_or_nan(other) + ): + warn_on_upcast = False + if warn_on_upcast: warnings.warn( f"Setting an item of incompatible dtype is deprecated " diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 288aa1af746b6..b324291bab31e 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -337,18 +337,12 @@ def test_setitem(self, float_frame, using_copy_on_write): def test_setitem2(self): # dtype changing GH4204 df = DataFrame([[0, 0]]) - with tm.assert_produces_warning( - FutureWarning, match="Setting an item of incompatible dtype" - ): - df.iloc[0] = np.nan + df.iloc[0] = np.nan expected = DataFrame([[np.nan, np.nan]]) tm.assert_frame_equal(df, expected) df = DataFrame([[0, 0]]) - with tm.assert_produces_warning( - FutureWarning, match="Setting an item of incompatible dtype" - ): - df.loc[0] = np.nan + df.loc[0] = np.nan tm.assert_frame_equal(df, expected) def test_setitem_boolean(self, float_frame): @@ -1579,9 +1573,7 @@ def test_setitem(self, uint64_frame): # With NaN: because uint64 has no NaN element, # the column should be cast to object. df2 = df.copy() - with tm.assert_produces_warning( - FutureWarning, match="Setting an item of incompatible dtype" - ): + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): df2.iloc[1, 1] = pd.NaT df2.iloc[1, 2] = pd.NaT result = df2["B"] @@ -1901,19 +1893,19 @@ def test_setitem_dict_and_set_disallowed_multiindex(self, key): class TestSetitemValidation: # This is adapted from pandas/tests/arrays/masked/test_indexing.py # but checks for warnings instead of errors. - def _check_setitem_invalid(self, df, invalid, indexer): + def _check_setitem_invalid(self, df, invalid, indexer, warn): msg = "Setting an item of incompatible dtype is deprecated" msg = re.escape(msg) orig_df = df.copy() # iloc - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): df.iloc[indexer, 0] = invalid df = orig_df.copy() # loc - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): df.loc[indexer, "a"] = invalid df = orig_df.copy() @@ -1934,16 +1926,20 @@ def _check_setitem_invalid(self, df, invalid, indexer): @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_bool(self, invalid, indexer): df = DataFrame({"a": [True, False, False]}, dtype="bool") - self._check_setitem_invalid(df, invalid, indexer) + self._check_setitem_invalid(df, invalid, indexer, FutureWarning) @pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)]) @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer): df = DataFrame({"a": [1, 2, 3]}, dtype=any_int_numpy_dtype) - self._check_setitem_invalid(df, invalid, indexer) + if isna(invalid) and invalid is not pd.NaT: + warn = None + else: + warn = FutureWarning + self._check_setitem_invalid(df, invalid, indexer, warn) @pytest.mark.parametrize("invalid", _invalid_scalars + [True]) @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer): df = DataFrame({"a": [1, 2, None]}, dtype=float_numpy_dtype) - self._check_setitem_invalid(df, invalid, indexer) + self._check_setitem_invalid(df, invalid, indexer, FutureWarning) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index d6d8a63797bb6..e10913b41bda5 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -830,8 +830,7 @@ def test_coercion_with_loc(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - with tm.assert_produces_warning(warn, match="incompatible dtype"): - start_dataframe.loc[0, ["foo"]] = None + start_dataframe.loc[0, ["foo"]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -841,8 +840,7 @@ def test_coercion_with_setitem_and_dataframe(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - with tm.assert_produces_warning(warn, match="incompatible dtype"): - start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -852,10 +850,7 @@ def test_none_coercion_loc_and_dataframe(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - with tm.assert_produces_warning(warn, match="incompatible dtype"): - start_dataframe.loc[ - start_dataframe["foo"] == start_dataframe["foo"][0] - ] = None + start_dataframe.loc[start_dataframe["foo"] == start_dataframe["foo"][0]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -869,10 +864,7 @@ def test_none_coercion_mixed_dtypes(self): "d": ["a", "b", "c"], } ) - with tm.assert_produces_warning( - FutureWarning, match="item of incompatible dtype" - ): - start_dataframe.iloc[0] = None + start_dataframe.iloc[0] = None exp = DataFrame( { diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 7b857a487db78..0fa28920d41bd 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -19,6 +19,7 @@ Timestamp, concat, date_range, + isna, period_range, timedelta_range, ) @@ -456,25 +457,25 @@ def test_setitem_dict_and_set_disallowed_multiindex(self, key): class TestSetitemValidation: # This is adapted from pandas/tests/arrays/masked/test_indexing.py # but checks for warnings instead of errors. - def _check_setitem_invalid(self, ser, invalid, indexer): + def _check_setitem_invalid(self, ser, invalid, indexer, warn): msg = "Setting an item of incompatible dtype is deprecated" msg = re.escape(msg) orig_ser = ser.copy() - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): ser[indexer] = invalid ser = orig_ser.copy() - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): ser.iloc[indexer] = invalid ser = orig_ser.copy() - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): ser.loc[indexer] = invalid ser = orig_ser.copy() - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): ser[:] = invalid _invalid_scalars = [ @@ -494,16 +495,20 @@ def _check_setitem_invalid(self, ser, invalid, indexer): @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_bool(self, invalid, indexer): ser = Series([True, False, False], dtype="bool") - self._check_setitem_invalid(ser, invalid, indexer) + self._check_setitem_invalid(ser, invalid, indexer, FutureWarning) @pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)]) @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer): ser = Series([1, 2, 3], dtype=any_int_numpy_dtype) - self._check_setitem_invalid(ser, invalid, indexer) + if isna(invalid) and invalid is not NaT: + warn = None + else: + warn = FutureWarning + self._check_setitem_invalid(ser, invalid, indexer, warn) @pytest.mark.parametrize("invalid", _invalid_scalars + [True]) @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer): ser = Series([1, 2, None], dtype=float_numpy_dtype) - self._check_setitem_invalid(ser, invalid, indexer) + self._check_setitem_invalid(ser, invalid, indexer, FutureWarning) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 1e091db21ff83..f419ff9384042 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -191,14 +191,11 @@ def test_setitem_series_object_dtype(self, indexer, ser_index): expected = Series([Series([42], index=[ser_index]), 0], dtype="object") tm.assert_series_equal(ser, expected) - @pytest.mark.parametrize( - "index, exp_value, warn", [(0, 42, None), (1, np.nan, FutureWarning)] - ) - def test_setitem_series(self, index, exp_value, warn): + @pytest.mark.parametrize("index, exp_value", [(0, 42), (1, np.nan)]) + def test_setitem_series(self, index, exp_value): # GH#38303 ser = Series([0, 0]) - with tm.assert_produces_warning(warn, match="item of incompatible dtype"): - ser.loc[0] = Series([42], index=[index]) + ser.loc[0] = Series([42], index=[index]) expected = Series([exp_value, 0]) tm.assert_series_equal(ser, expected) @@ -575,7 +572,7 @@ def test_setitem_keep_precision(self, any_numeric_ea_dtype): [ (NA, NA, "Int64", "Int64", 1, None), (NA, NA, "Int64", "Int64", 2, None), - (NA, np.nan, "int64", "float64", 1, FutureWarning), + (NA, np.nan, "int64", "float64", 1, None), (NA, np.nan, "int64", "float64", 2, None), (NaT, NaT, "int64", "object", 1, FutureWarning), (NaT, NaT, "int64", "object", 2, None), @@ -583,7 +580,7 @@ def test_setitem_keep_precision(self, any_numeric_ea_dtype): (np.nan, NA, "Int64", "Int64", 2, None), (np.nan, NA, "Float64", "Float64", 1, None), (np.nan, NA, "Float64", "Float64", 2, None), - (np.nan, np.nan, "int64", "float64", 1, FutureWarning), + (np.nan, np.nan, "int64", "float64", 1, None), (np.nan, np.nan, "int64", "float64", 2, None), ], ) @@ -592,7 +589,7 @@ def test_setitem_enlarge_with_na( ): # GH#32346 ser = Series([1, 2], dtype=dtype) - with tm.assert_produces_warning(warn, match="item of incompatible dtype"): + with tm.assert_produces_warning(warn, match="incompatible dtype"): ser[indexer] = na expected_values = [1, target_na] if indexer == 1 else [1, 2, target_na] expected = Series(expected_values, dtype=target_dtype) @@ -884,7 +881,7 @@ def test_index_putmask(self, obj, key, expected, warn, val): Series([2, 3, 4, 5, 6, 7, 8, 9, 10]), Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]), slice(None, None, 2), - FutureWarning, + None, id="int_series_slice_key_step", ), pytest.param( @@ -899,7 +896,7 @@ def test_index_putmask(self, obj, key, expected, warn, val): Series(np.arange(10)), Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]), slice(None, 5), - FutureWarning, + None, id="int_series_slice_key", ), pytest.param( @@ -907,7 +904,7 @@ def test_index_putmask(self, obj, key, expected, warn, val): Series([1, 2, 3]), Series([np.nan, 2, 3]), 0, - FutureWarning, + None, id="int_series_int_key", ), pytest.param( @@ -1134,7 +1131,7 @@ def warn(self): "obj,expected,warn", [ # For numeric series, we should coerce to NaN. - (Series([1, 2, 3]), Series([np.nan, 2, 3]), FutureWarning), + (Series([1, 2, 3]), Series([np.nan, 2, 3]), None), (Series([1.0, 2.0, 3.0]), Series([np.nan, 2.0, 3.0]), None), # For datetime series, we should coerce to NaT. ( @@ -1584,13 +1581,11 @@ def test_20643_comment(): expected = Series([np.nan, 1, 2], index=["a", "b", "c"]) ser = orig.copy() - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - ser.iat[0] = None + ser.iat[0] = None tm.assert_series_equal(ser, expected) ser = orig.copy() - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - ser.iloc[0] = None + ser.iloc[0] = None tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 9cd0ce250b5df..d1c79d0f00365 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -206,17 +206,7 @@ def test_convert_dtypes( # Test that it is a copy copy = series.copy(deep=True) - if result.notna().sum() > 0 and result.dtype in [ - "int8", - "uint8", - "int16", - "uint16", - "int32", - "uint32", - "int64", - "uint64", - "interval[int64, right]", - ]: + if result.notna().sum() > 0 and result.dtype in ["interval[int64, right]"]: with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): result[result.notna()] = np.nan else: