From 9be06becef5306b619f9f0b18f61ccc52ba59123 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 13 Aug 2023 11:09:13 +0100 Subject: [PATCH 01/10] wip --- pandas/core/internals/blocks.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ecb9cd47d7995..a26adaa371c53 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -60,6 +60,7 @@ from pandas.core.dtypes.common import ( ensure_platform_int, is_1d_only_ea_dtype, + is_integer_dtype, is_list_like, is_string_dtype, ) @@ -453,6 +454,10 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: we can also safely try to coerce to the same dtype and will receive the same block """ + if isna(other) and is_integer_dtype(self.values.dtype): + # In a future version of pandas, the default will be that + # setting `nan` into an integer series won't raise. + warn_on_upcast = False new_dtype = find_result_type(self.values.dtype, other) if warn_on_upcast: warnings.warn( From 1bfd932611ae1aff458de7faf5c930c2f7679a8c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 21 Aug 2023 15:53:02 +0200 Subject: [PATCH 02/10] add has_only_ints_or_nan cython helper --- pandas/_libs/lib.pyi | 1 + pandas/_libs/lib.pyx | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 32641319a6b96..15bd5a7379105 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -195,6 +195,7 @@ def array_equivalent_object( right: npt.NDArray[np.object_], ) -> bool: ... def has_infs(arr: np.ndarray) -> bool: ... # const floating[:] +def has_only_ints_or_nan(arr: np.ndarray) -> bool: ... # const floating[:] def get_reverse_indexer( indexer: np.ndarray, # const intp_t[:] length: int, diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a96152ccdf3cc..1b386858a305d 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -530,6 +530,22 @@ def has_infs(floating[:] arr) -> bool: return ret +@cython.boundscheck(False) +@cython.wraparound(False) +def has_only_ints_or_nan(floating[:] arr) -> bool: + cdef: + floating val + intp_t i + + for i in range(len(arr)): + val = arr[i] + if (val != val) or (val == val): + continue + else: + return False + return True + + def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len): cdef: Py_ssize_t i, n = len(indices) From 26bd53c294130a17720e38025ce3f15f981f646b Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 21 Aug 2023 14:47:56 +0100 Subject: [PATCH 03/10] wip --- pandas/core/internals/blocks.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a26adaa371c53..ff79c044724b6 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -60,8 +60,10 @@ from pandas.core.dtypes.common import ( ensure_platform_int, is_1d_only_ea_dtype, + is_float_dtype, is_integer_dtype, is_list_like, + is_scalar, is_string_dtype, ) from pandas.core.dtypes.dtypes import ( @@ -454,11 +456,19 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: we can also safely try to coerce to the same dtype and will receive the same block """ - if isna(other) and is_integer_dtype(self.values.dtype): + new_dtype = find_result_type(self.values.dtype, other) + + if ( + is_scalar(other) and isna(other) and is_integer_dtype(self.values.dtype) + ) or ( + is_integer_dtype(self.values.dtype) + and is_float_dtype(other.dtype) + and lib.has_only_ints_or_nan(other) + ): # In a future version of pandas, the default will be that # setting `nan` into an integer series won't raise. warn_on_upcast = False - new_dtype = find_result_type(self.values.dtype, other) + if warn_on_upcast: warnings.warn( f"Setting an item of incompatible dtype is deprecated " From d4e989a36faadc173acf245fbc69977b86b52dcc Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 21 Aug 2023 15:35:26 +0100 Subject: [PATCH 04/10] take care of nat --- pandas/core/internals/blocks.py | 23 +++++++++++++++----- pandas/tests/frame/indexing/test_indexing.py | 10 ++------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b23c011bd1bc4..05e0ad3748a57 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -458,15 +458,26 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: """ new_dtype = find_result_type(self.values.dtype, other) - if ( - is_scalar(other) and isna(other) and is_integer_dtype(self.values.dtype) - ) or ( - is_integer_dtype(self.values.dtype) + # In a future version of pandas, the default will be that + # setting `nan` into an integer series won't raise. + if is_scalar(other) and is_integer_dtype(self.values.dtype): + try: + is_nan = np.isnan(other) + except TypeError: + is_nan = False + try: + is_nat = np.isnat(other) + except TypeError: + is_nat = False + if is_nan and not is_nat: + warn_on_upcast = False + elif ( + isinstance(other, np.ndarray) + and other.ndim == 1 + and is_integer_dtype(self.values.dtype) and is_float_dtype(other.dtype) and lib.has_only_ints_or_nan(other) ): - # In a future version of pandas, the default will be that - # setting `nan` into an integer series won't raise. warn_on_upcast = False if warn_on_upcast: diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 288aa1af746b6..adfd472a9e5cf 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -337,18 +337,12 @@ def test_setitem(self, float_frame, using_copy_on_write): def test_setitem2(self): # dtype changing GH4204 df = DataFrame([[0, 0]]) - with tm.assert_produces_warning( - FutureWarning, match="Setting an item of incompatible dtype" - ): - df.iloc[0] = np.nan + df.iloc[0] = np.nan expected = DataFrame([[np.nan, np.nan]]) tm.assert_frame_equal(df, expected) df = DataFrame([[0, 0]]) - with tm.assert_produces_warning( - FutureWarning, match="Setting an item of incompatible dtype" - ): - df.loc[0] = np.nan + df.loc[0] = np.nan tm.assert_frame_equal(df, expected) def test_setitem_boolean(self, float_frame): From 99767c064d797ea78dd2812cc148d960c8ab66fd Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 21 Aug 2023 16:30:41 +0100 Subject: [PATCH 05/10] fixup more tests --- pandas/core/internals/blocks.py | 8 +++++- pandas/tests/indexing/test_indexing.py | 16 +++-------- pandas/tests/series/indexing/test_setitem.py | 27 ++++++++----------- .../series/methods/test_convert_dtypes.py | 16 +---------- 4 files changed, 23 insertions(+), 44 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 05e0ad3748a57..cd3fddcb789f4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -460,7 +460,13 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: # In a future version of pandas, the default will be that # setting `nan` into an integer series won't raise. - if is_scalar(other) and is_integer_dtype(self.values.dtype): + if is_scalar(other) and ( + is_integer_dtype(self.values.dtype) + or ( + isinstance(self.values.dtype, IntervalDtype) + and is_integer_dtype(self.values.dtype.subtype) + ) + ): try: is_nan = np.isnan(other) except TypeError: diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index d6d8a63797bb6..e10913b41bda5 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -830,8 +830,7 @@ def test_coercion_with_loc(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - with tm.assert_produces_warning(warn, match="incompatible dtype"): - start_dataframe.loc[0, ["foo"]] = None + start_dataframe.loc[0, ["foo"]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -841,8 +840,7 @@ def test_coercion_with_setitem_and_dataframe(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - with tm.assert_produces_warning(warn, match="incompatible dtype"): - start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -852,10 +850,7 @@ def test_none_coercion_loc_and_dataframe(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - with tm.assert_produces_warning(warn, match="incompatible dtype"): - start_dataframe.loc[ - start_dataframe["foo"] == start_dataframe["foo"][0] - ] = None + start_dataframe.loc[start_dataframe["foo"] == start_dataframe["foo"][0]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -869,10 +864,7 @@ def test_none_coercion_mixed_dtypes(self): "d": ["a", "b", "c"], } ) - with tm.assert_produces_warning( - FutureWarning, match="item of incompatible dtype" - ): - start_dataframe.iloc[0] = None + start_dataframe.iloc[0] = None exp = DataFrame( { diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 1e091db21ff83..d08cfd6f29841 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -191,14 +191,11 @@ def test_setitem_series_object_dtype(self, indexer, ser_index): expected = Series([Series([42], index=[ser_index]), 0], dtype="object") tm.assert_series_equal(ser, expected) - @pytest.mark.parametrize( - "index, exp_value, warn", [(0, 42, None), (1, np.nan, FutureWarning)] - ) - def test_setitem_series(self, index, exp_value, warn): + @pytest.mark.parametrize("index, exp_value", [(0, 42), (1, np.nan)]) + def test_setitem_series(self, index, exp_value): # GH#38303 ser = Series([0, 0]) - with tm.assert_produces_warning(warn, match="item of incompatible dtype"): - ser.loc[0] = Series([42], index=[index]) + ser.loc[0] = Series([42], index=[index]) expected = Series([exp_value, 0]) tm.assert_series_equal(ser, expected) @@ -575,7 +572,7 @@ def test_setitem_keep_precision(self, any_numeric_ea_dtype): [ (NA, NA, "Int64", "Int64", 1, None), (NA, NA, "Int64", "Int64", 2, None), - (NA, np.nan, "int64", "float64", 1, FutureWarning), + (NA, np.nan, "int64", "float64", 1, None), (NA, np.nan, "int64", "float64", 2, None), (NaT, NaT, "int64", "object", 1, FutureWarning), (NaT, NaT, "int64", "object", 2, None), @@ -583,7 +580,7 @@ def test_setitem_keep_precision(self, any_numeric_ea_dtype): (np.nan, NA, "Int64", "Int64", 2, None), (np.nan, NA, "Float64", "Float64", 1, None), (np.nan, NA, "Float64", "Float64", 2, None), - (np.nan, np.nan, "int64", "float64", 1, FutureWarning), + (np.nan, np.nan, "int64", "float64", 1, None), (np.nan, np.nan, "int64", "float64", 2, None), ], ) @@ -884,7 +881,7 @@ def test_index_putmask(self, obj, key, expected, warn, val): Series([2, 3, 4, 5, 6, 7, 8, 9, 10]), Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]), slice(None, None, 2), - FutureWarning, + None, id="int_series_slice_key_step", ), pytest.param( @@ -899,7 +896,7 @@ def test_index_putmask(self, obj, key, expected, warn, val): Series(np.arange(10)), Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]), slice(None, 5), - FutureWarning, + None, id="int_series_slice_key", ), pytest.param( @@ -907,7 +904,7 @@ def test_index_putmask(self, obj, key, expected, warn, val): Series([1, 2, 3]), Series([np.nan, 2, 3]), 0, - FutureWarning, + None, id="int_series_int_key", ), pytest.param( @@ -1134,7 +1131,7 @@ def warn(self): "obj,expected,warn", [ # For numeric series, we should coerce to NaN. - (Series([1, 2, 3]), Series([np.nan, 2, 3]), FutureWarning), + (Series([1, 2, 3]), Series([np.nan, 2, 3]), None), (Series([1.0, 2.0, 3.0]), Series([np.nan, 2.0, 3.0]), None), # For datetime series, we should coerce to NaT. ( @@ -1584,13 +1581,11 @@ def test_20643_comment(): expected = Series([np.nan, 1, 2], index=["a", "b", "c"]) ser = orig.copy() - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - ser.iat[0] = None + ser.iat[0] = None tm.assert_series_equal(ser, expected) ser = orig.copy() - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - ser.iloc[0] = None + ser.iloc[0] = None tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 9cd0ce250b5df..f2ac5f1086625 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -206,21 +206,7 @@ def test_convert_dtypes( # Test that it is a copy copy = series.copy(deep=True) - if result.notna().sum() > 0 and result.dtype in [ - "int8", - "uint8", - "int16", - "uint16", - "int32", - "uint32", - "int64", - "uint64", - "interval[int64, right]", - ]: - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - result[result.notna()] = np.nan - else: - result[result.notna()] = np.nan + result[result.notna()] = np.nan # Make sure original not changed tm.assert_series_equal(series, copy) From 5f99673679d7f55d02e61ab29370606e70c481ee Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 21 Aug 2023 19:41:51 +0100 Subject: [PATCH 06/10] catch interval[int64, right] warning --- pandas/core/internals/blocks.py | 13 +++++-------- pandas/tests/series/methods/test_convert_dtypes.py | 6 +++++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index cd3fddcb789f4..b90e81933df6b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -460,20 +460,17 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: # In a future version of pandas, the default will be that # setting `nan` into an integer series won't raise. - if is_scalar(other) and ( - is_integer_dtype(self.values.dtype) - or ( - isinstance(self.values.dtype, IntervalDtype) - and is_integer_dtype(self.values.dtype.subtype) - ) - ): + if is_scalar(other) and is_integer_dtype(self.values.dtype): try: - is_nan = np.isnan(other) + is_nan = bool(np.isnan(other)) except TypeError: + # ufunc 'isnan' not supported for the input types + # boolean value of NA is ambiguous is_nan = False try: is_nat = np.isnat(other) except TypeError: + # ufunc 'isnat' is only defined for np.datetime64 and np.timedelta64 is_nat = False if is_nan and not is_nat: warn_on_upcast = False diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index f2ac5f1086625..d1c79d0f00365 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -206,7 +206,11 @@ def test_convert_dtypes( # Test that it is a copy copy = series.copy(deep=True) - result[result.notna()] = np.nan + if result.notna().sum() > 0 and result.dtype in ["interval[int64, right]"]: + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + result[result.notna()] = np.nan + else: + result[result.notna()] = np.nan # Make sure original not changed tm.assert_series_equal(series, copy) From 4f6869f6a8c4da81368a4cf21570d20a46534999 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 22 Aug 2023 11:41:56 +0100 Subject: [PATCH 07/10] just use isna --- pandas/core/internals/blocks.py | 16 ++------- pandas/tests/series/indexing/test_indexing.py | 21 +++++++----- pandas/tests/series/indexing/test_setitem.py | 33 +++++++++---------- 3 files changed, 30 insertions(+), 40 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b90e81933df6b..4526ae0999d2b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -460,20 +460,8 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: # In a future version of pandas, the default will be that # setting `nan` into an integer series won't raise. - if is_scalar(other) and is_integer_dtype(self.values.dtype): - try: - is_nan = bool(np.isnan(other)) - except TypeError: - # ufunc 'isnan' not supported for the input types - # boolean value of NA is ambiguous - is_nan = False - try: - is_nat = np.isnat(other) - except TypeError: - # ufunc 'isnat' is only defined for np.datetime64 and np.timedelta64 - is_nat = False - if is_nan and not is_nat: - warn_on_upcast = False + if is_scalar(other) and is_integer_dtype(self.values.dtype) and isna(other): + warn_on_upcast = False elif ( isinstance(other, np.ndarray) and other.ndim == 1 diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 7b857a487db78..55ed2bc7b6c4c 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -19,6 +19,7 @@ Timestamp, concat, date_range, + isna, period_range, timedelta_range, ) @@ -456,25 +457,25 @@ def test_setitem_dict_and_set_disallowed_multiindex(self, key): class TestSetitemValidation: # This is adapted from pandas/tests/arrays/masked/test_indexing.py # but checks for warnings instead of errors. - def _check_setitem_invalid(self, ser, invalid, indexer): + def _check_setitem_invalid(self, ser, invalid, indexer, warn): msg = "Setting an item of incompatible dtype is deprecated" msg = re.escape(msg) orig_ser = ser.copy() - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): ser[indexer] = invalid ser = orig_ser.copy() - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): ser.iloc[indexer] = invalid ser = orig_ser.copy() - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): ser.loc[indexer] = invalid ser = orig_ser.copy() - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): ser[:] = invalid _invalid_scalars = [ @@ -494,16 +495,20 @@ def _check_setitem_invalid(self, ser, invalid, indexer): @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_bool(self, invalid, indexer): ser = Series([True, False, False], dtype="bool") - self._check_setitem_invalid(ser, invalid, indexer) + self._check_setitem_invalid(ser, invalid, indexer, FutureWarning) @pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)]) @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer): ser = Series([1, 2, 3], dtype=any_int_numpy_dtype) - self._check_setitem_invalid(ser, invalid, indexer) + if isna(invalid): + warn = None + else: + warn = FutureWarning + self._check_setitem_invalid(ser, invalid, indexer, warn) @pytest.mark.parametrize("invalid", _invalid_scalars + [True]) @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer): ser = Series([1, 2, None], dtype=float_numpy_dtype) - self._check_setitem_invalid(ser, invalid, indexer) + self._check_setitem_invalid(ser, invalid, indexer, FutureWarning) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index d08cfd6f29841..86e40014eb72c 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -568,29 +568,26 @@ def test_setitem_keep_precision(self, any_numeric_ea_dtype): tm.assert_series_equal(ser, expected) @pytest.mark.parametrize( - "na, target_na, dtype, target_dtype, indexer, warn", + "na, target_na, dtype, target_dtype, indexer", [ - (NA, NA, "Int64", "Int64", 1, None), - (NA, NA, "Int64", "Int64", 2, None), - (NA, np.nan, "int64", "float64", 1, None), - (NA, np.nan, "int64", "float64", 2, None), - (NaT, NaT, "int64", "object", 1, FutureWarning), - (NaT, NaT, "int64", "object", 2, None), - (np.nan, NA, "Int64", "Int64", 1, None), - (np.nan, NA, "Int64", "Int64", 2, None), - (np.nan, NA, "Float64", "Float64", 1, None), - (np.nan, NA, "Float64", "Float64", 2, None), - (np.nan, np.nan, "int64", "float64", 1, None), - (np.nan, np.nan, "int64", "float64", 2, None), + (NA, NA, "Int64", "Int64", 1), + (NA, NA, "Int64", "Int64", 2), + (NA, np.nan, "int64", "float64", 1), + (NA, np.nan, "int64", "float64", 2), + (NaT, NaT, "int64", "object", 1), + (NaT, NaT, "int64", "object", 2), + (np.nan, NA, "Int64", "Int64", 1), + (np.nan, NA, "Int64", "Int64", 2), + (np.nan, NA, "Float64", "Float64", 1), + (np.nan, NA, "Float64", "Float64", 2), + (np.nan, np.nan, "int64", "float64", 1), + (np.nan, np.nan, "int64", "float64", 2), ], ) - def test_setitem_enlarge_with_na( - self, na, target_na, dtype, target_dtype, indexer, warn - ): + def test_setitem_enlarge_with_na(self, na, target_na, dtype, target_dtype, indexer): # GH#32346 ser = Series([1, 2], dtype=dtype) - with tm.assert_produces_warning(warn, match="item of incompatible dtype"): - ser[indexer] = na + ser[indexer] = na expected_values = [1, target_na] if indexer == 1 else [1, 2, target_na] expected = Series(expected_values, dtype=target_dtype) tm.assert_series_equal(ser, expected) From f88bcc1a657fb48c0545b2aabe7260d0c754ff0e Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 22 Aug 2023 11:48:13 +0100 Subject: [PATCH 08/10] fixup tests --- pandas/tests/frame/indexing/test_indexing.py | 23 ++++++++++---------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index adfd472a9e5cf..a7485139ff540 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1573,11 +1573,8 @@ def test_setitem(self, uint64_frame): # With NaN: because uint64 has no NaN element, # the column should be cast to object. df2 = df.copy() - with tm.assert_produces_warning( - FutureWarning, match="Setting an item of incompatible dtype" - ): - df2.iloc[1, 1] = pd.NaT - df2.iloc[1, 2] = pd.NaT + df2.iloc[1, 1] = pd.NaT + df2.iloc[1, 2] = pd.NaT result = df2["B"] tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) tm.assert_series_equal( @@ -1895,19 +1892,19 @@ def test_setitem_dict_and_set_disallowed_multiindex(self, key): class TestSetitemValidation: # This is adapted from pandas/tests/arrays/masked/test_indexing.py # but checks for warnings instead of errors. - def _check_setitem_invalid(self, df, invalid, indexer): + def _check_setitem_invalid(self, df, invalid, indexer, warn): msg = "Setting an item of incompatible dtype is deprecated" msg = re.escape(msg) orig_df = df.copy() # iloc - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): df.iloc[indexer, 0] = invalid df = orig_df.copy() # loc - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(warn, match=msg): df.loc[indexer, "a"] = invalid df = orig_df.copy() @@ -1928,16 +1925,20 @@ def _check_setitem_invalid(self, df, invalid, indexer): @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_bool(self, invalid, indexer): df = DataFrame({"a": [True, False, False]}, dtype="bool") - self._check_setitem_invalid(df, invalid, indexer) + self._check_setitem_invalid(df, invalid, indexer, FutureWarning) @pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)]) @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer): df = DataFrame({"a": [1, 2, 3]}, dtype=any_int_numpy_dtype) - self._check_setitem_invalid(df, invalid, indexer) + if isna(invalid): + warn = None + else: + warn = FutureWarning + self._check_setitem_invalid(df, invalid, indexer, warn) @pytest.mark.parametrize("invalid", _invalid_scalars + [True]) @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer): df = DataFrame({"a": [1, 2, None]}, dtype=float_numpy_dtype) - self._check_setitem_invalid(df, invalid, indexer) + self._check_setitem_invalid(df, invalid, indexer, FutureWarning) From 02cf661f31c3730a380cb89ea9c6ec1832a1e69d Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 22 Aug 2023 12:06:05 +0100 Subject: [PATCH 09/10] noop From a2b5144dee310291a1fb534fc49cbc1d4e9bcfc6 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 25 Aug 2023 11:43:15 +0100 Subject: [PATCH 10/10] exclude NaT --- pandas/core/internals/blocks.py | 8 ++++- pandas/tests/frame/indexing/test_indexing.py | 7 ++-- pandas/tests/series/indexing/test_indexing.py | 2 +- pandas/tests/series/indexing/test_setitem.py | 33 ++++++++++--------- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4e6da85019826..bffc1251a0729 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -18,6 +18,7 @@ from pandas._config import using_copy_on_write from pandas._libs import ( + NaT, internals as libinternals, lib, writers, @@ -459,7 +460,12 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: # In a future version of pandas, the default will be that # setting `nan` into an integer series won't raise. - if is_scalar(other) and is_integer_dtype(self.values.dtype) and isna(other): + if ( + is_scalar(other) + and is_integer_dtype(self.values.dtype) + and isna(other) + and other is not NaT + ): warn_on_upcast = False elif ( isinstance(other, np.ndarray) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index a7485139ff540..b324291bab31e 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1573,8 +1573,9 @@ def test_setitem(self, uint64_frame): # With NaN: because uint64 has no NaN element, # the column should be cast to object. df2 = df.copy() - df2.iloc[1, 1] = pd.NaT - df2.iloc[1, 2] = pd.NaT + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df2.iloc[1, 1] = pd.NaT + df2.iloc[1, 2] = pd.NaT result = df2["B"] tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) tm.assert_series_equal( @@ -1931,7 +1932,7 @@ def test_setitem_validation_scalar_bool(self, invalid, indexer): @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer): df = DataFrame({"a": [1, 2, 3]}, dtype=any_int_numpy_dtype) - if isna(invalid): + if isna(invalid) and invalid is not pd.NaT: warn = None else: warn = FutureWarning diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 55ed2bc7b6c4c..0fa28920d41bd 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -501,7 +501,7 @@ def test_setitem_validation_scalar_bool(self, invalid, indexer): @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer): ser = Series([1, 2, 3], dtype=any_int_numpy_dtype) - if isna(invalid): + if isna(invalid) and invalid is not NaT: warn = None else: warn = FutureWarning diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 86e40014eb72c..f419ff9384042 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -568,26 +568,29 @@ def test_setitem_keep_precision(self, any_numeric_ea_dtype): tm.assert_series_equal(ser, expected) @pytest.mark.parametrize( - "na, target_na, dtype, target_dtype, indexer", + "na, target_na, dtype, target_dtype, indexer, warn", [ - (NA, NA, "Int64", "Int64", 1), - (NA, NA, "Int64", "Int64", 2), - (NA, np.nan, "int64", "float64", 1), - (NA, np.nan, "int64", "float64", 2), - (NaT, NaT, "int64", "object", 1), - (NaT, NaT, "int64", "object", 2), - (np.nan, NA, "Int64", "Int64", 1), - (np.nan, NA, "Int64", "Int64", 2), - (np.nan, NA, "Float64", "Float64", 1), - (np.nan, NA, "Float64", "Float64", 2), - (np.nan, np.nan, "int64", "float64", 1), - (np.nan, np.nan, "int64", "float64", 2), + (NA, NA, "Int64", "Int64", 1, None), + (NA, NA, "Int64", "Int64", 2, None), + (NA, np.nan, "int64", "float64", 1, None), + (NA, np.nan, "int64", "float64", 2, None), + (NaT, NaT, "int64", "object", 1, FutureWarning), + (NaT, NaT, "int64", "object", 2, None), + (np.nan, NA, "Int64", "Int64", 1, None), + (np.nan, NA, "Int64", "Int64", 2, None), + (np.nan, NA, "Float64", "Float64", 1, None), + (np.nan, NA, "Float64", "Float64", 2, None), + (np.nan, np.nan, "int64", "float64", 1, None), + (np.nan, np.nan, "int64", "float64", 2, None), ], ) - def test_setitem_enlarge_with_na(self, na, target_na, dtype, target_dtype, indexer): + def test_setitem_enlarge_with_na( + self, na, target_na, dtype, target_dtype, indexer, warn + ): # GH#32346 ser = Series([1, 2], dtype=dtype) - ser[indexer] = na + with tm.assert_produces_warning(warn, match="incompatible dtype"): + ser[indexer] = na expected_values = [1, target_na] if indexer == 1 else [1, 2, target_na] expected = Series(expected_values, dtype=target_dtype) tm.assert_series_equal(ser, expected)