From 4644c00cc7207105c74012c0fe6047dff41fe828 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 24 Nov 2023 11:35:31 +0000 Subject: [PATCH 01/35] raise pdep6 warning for loc full setter --- doc/source/whatsnew/v2.2.0.rst | 1 + pandas/core/frame.py | 6 +++--- pandas/core/indexing.py | 2 +- pandas/core/internals/managers.py | 9 +++++++++ pandas/tests/frame/indexing/test_indexing.py | 16 +++++++++------ pandas/tests/frame/indexing/test_setitem.py | 20 +++++++++++++++++++ pandas/tests/series/indexing/test_indexing.py | 2 +- 7 files changed, 45 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 8893fe0ecd398..ee77a6f6fc37d 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -487,6 +487,7 @@ Conversion ^^^^^^^^^^ - Bug in :func:`astype` when called with ``str`` on unpickled array - the array might change in-place (:issue:`54654`) - Bug in :meth:`Series.convert_dtypes` not converting all NA column to ``null[pyarrow]`` (:issue:`55346`) +- Bug in ``DataFrame.loc`` was not throwing "incompatible dtype warning" (see PDEP6) when assigning a ``Series`` with a different dtype using a full column setter (e.g. ``df.loc[:, 'a'] = incompatible_value``) (:issue:`55791`) - Strings diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5d05983529fba..3fa823b5267d8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4157,7 +4157,7 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar: loc = engine.get_loc(index) return series._values[loc] - def isetitem(self, loc, value) -> None: + def isetitem(self, loc, value, *, inplace: bool = False) -> None: """ Set the given value in the column with position `loc`. @@ -4193,11 +4193,11 @@ def isetitem(self, loc, value) -> None: for i, idx in enumerate(loc): arraylike, refs = self._sanitize_column(value.iloc[:, i]) - self._iset_item_mgr(idx, arraylike, inplace=False, refs=refs) + self._iset_item_mgr(idx, arraylike, inplace=inplace, refs=refs) return arraylike, refs = self._sanitize_column(value) - self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs) + self._iset_item_mgr(loc, arraylike, inplace=inplace, refs=refs) def __setitem__(self, key, value) -> None: if not PYPY and using_copy_on_write(): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 13756dd5a70e4..1800bdad2b94c 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2111,7 +2111,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: # If we're setting an entire column and we can't do it inplace, # then we can use value's dtype (or inferred dtype) # instead of object - self.obj.isetitem(loc, value) + self.obj.isetitem(loc, value, inplace=True) else: # set value into the column (first attempting to operate inplace, then # falling back to casting if necessary) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 843441e4865c7..1997c86582cc8 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1326,6 +1326,15 @@ def _iset_single( iloc = self.blklocs[loc] blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy) return + elif inplace: + warnings.warn( + f"Setting an item of incompatible dtype is deprecated " + "and will raise in a future error of pandas. " + f"Value '{value}' has dtype incompatible with {blk.dtype}, " + "please explicitly cast to a compatible dtype first.", + FutureWarning, + stacklevel=find_stack_level(), + ) nb = new_block_2d(value, placement=blk._mgr_locs, refs=refs) old_blocks = self.blocks diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index b2d94ff5ffbd1..5b564e33867c9 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1944,14 +1944,18 @@ def _check_setitem_invalid(self, df, invalid, indexer, warn): orig_df = df.copy() - # iloc + # loc with tm.assert_produces_warning(warn, match=msg): - df.iloc[indexer, 0] = invalid + df.loc[indexer, "a"] = invalid df = orig_df.copy() - # loc + # iloc + if indexer is slice(None, None, None): + # This is only inplace for the `.loc` case, + # so doesn't need to warn for the `.iloc` case. + warn = None with tm.assert_produces_warning(warn, match=msg): - df.loc[indexer, "a"] = invalid + df.iloc[indexer, 0] = invalid df = orig_df.copy() _invalid_scalars = [ @@ -1963,7 +1967,7 @@ def _check_setitem_invalid(self, df, invalid, indexer, warn): np.datetime64("NaT"), np.timedelta64("NaT"), ] - _indexers = [0, [0], slice(0, 1), [True, False, False]] + _indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)] @pytest.mark.parametrize( "invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)] @@ -1977,7 +1981,7 @@ def test_setitem_validation_scalar_bool(self, invalid, indexer): @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer): df = DataFrame({"a": [1, 2, 3]}, dtype=any_int_numpy_dtype) - if isna(invalid) and invalid is not pd.NaT: + if isna(invalid) and invalid is not pd.NaT and not np.isnat(invalid): warn = None else: warn = FutureWarning diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index bc632209ff7e1..c8d706a252a3f 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1366,3 +1366,23 @@ def test_frame_setitem_empty_dataframe(self): index=Index([], dtype="datetime64[ns]", name="date"), ) tm.assert_frame_equal(df, expected) + + +def test_full_setter_loc_incompatible_dtype(): + # https://github.com/pandas-dev/pandas/issues/55791 + df = DataFrame({"a": [1, 2]}) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[:, "a"] = True + expected = DataFrame({"a": [True, True]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame({"a": [1, 2]}) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[:, "a"] = {0: 3.5, 1: 4.5} + expected = DataFrame({"a": [3.5, 4.5]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame({"a": [1, 2]}) + df.loc[:, "a"] = {0: 3, 1: 4} + expected = DataFrame({"a": [3, 4]}) + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index b108ec24732ac..5c3b57a1dc50c 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -489,7 +489,7 @@ def _check_setitem_invalid(self, ser, invalid, indexer, warn): np.datetime64("NaT"), np.timedelta64("NaT"), ] - _indexers = [0, [0], slice(0, 1), [True, False, False]] + _indexers = [0, [0], slice(0, 1), [True, False, False], slice(None, None, None)] @pytest.mark.parametrize( "invalid", _invalid_scalars + [1, 1.0, np.int64(1), np.float64(1)] From da7efade47c3e8b9d39f789e2eaa2b5f84de825a Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 24 Nov 2023 12:41:09 +0000 Subject: [PATCH 02/35] update for stata reader --- pandas/io/stata.py | 5 ++-- pandas/tests/frame/indexing/test_indexing.py | 27 ++++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 70294e8a62cca..b72e515a91aa8 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1790,7 +1790,7 @@ def read( if convert_dates: for i, fmt in enumerate(self._fmtlist): if any(fmt.startswith(date_fmt) for date_fmt in _date_formats): - data.iloc[:, i] = _stata_elapsed_date_to_datetime_vec( + data[data.columns[i]] = _stata_elapsed_date_to_datetime_vec( data.iloc[:, i], fmt ) @@ -1866,7 +1866,8 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra replacements[i] = replacement if replacements: for idx, value in replacements.items(): - data.iloc[:, idx] = value + data[data.columns[idx]] = value + # data.iloc[:, idx] = value return data def _insert_strls(self, data: DataFrame) -> DataFrame: diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 5b564e33867c9..ca6a156eb0a43 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -337,12 +337,14 @@ def test_setitem(self, float_frame, using_copy_on_write, warn_copy_on_write): def test_setitem2(self): # dtype changing GH4204 df = DataFrame([[0, 0]]) - df.iloc[0] = np.nan + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.iloc[0] = np.nan expected = DataFrame([[np.nan, np.nan]]) tm.assert_frame_equal(df, expected) df = DataFrame([[0, 0]]) - df.loc[0] = np.nan + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[0] = np.nan tm.assert_frame_equal(df, expected) def test_setitem_boolean(self, float_frame): @@ -934,7 +936,8 @@ def test_setitem_frame_upcast(self): # needs upcasting df = DataFrame([[1, 2, "foo"], [3, 4, "bar"]], columns=["A", "B", "C"]) df2 = df.copy() - df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5 expected = df.reindex(columns=["A", "B"]) expected += 0.5 expected["C"] = df["C"] @@ -1374,9 +1377,9 @@ def test_loc_expand_empty_frame_keep_midx_names(self): @pytest.mark.parametrize( "val, idxr, warn", [ - ("x", "a", None), # TODO: this should warn as well - ("x", ["a"], None), # TODO: this should warn as well - (1, "a", None), # TODO: this should warn as well + ("x", "a", FutureWarning), + ("x", ["a"], FutureWarning), + (1, "a", FutureWarning), (1, ["a"], FutureWarning), ], ) @@ -1944,18 +1947,14 @@ def _check_setitem_invalid(self, df, invalid, indexer, warn): orig_df = df.copy() - # loc + # iloc with tm.assert_produces_warning(warn, match=msg): - df.loc[indexer, "a"] = invalid + df.iloc[indexer, 0] = invalid df = orig_df.copy() - # iloc - if indexer is slice(None, None, None): - # This is only inplace for the `.loc` case, - # so doesn't need to warn for the `.iloc` case. - warn = None + # loc with tm.assert_produces_warning(warn, match=msg): - df.iloc[indexer, 0] = invalid + df.loc[indexer, "a"] = invalid df = orig_df.copy() _invalid_scalars = [ From 9e400c27825898e821c601740e2dd77f4fb68fa4 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 24 Nov 2023 12:42:12 +0000 Subject: [PATCH 03/35] clean --- pandas/io/stata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index b72e515a91aa8..66d0957f0d089 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1867,7 +1867,6 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra if replacements: for idx, value in replacements.items(): data[data.columns[idx]] = value - # data.iloc[:, idx] = value return data def _insert_strls(self, data: DataFrame) -> DataFrame: From 003b99308ac7c042622d467ce33e007933f4e908 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 24 Nov 2023 12:42:48 +0000 Subject: [PATCH 04/35] clean --- pandas/tests/frame/indexing/test_indexing.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index ca6a156eb0a43..ac59600d4c5ba 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1375,20 +1375,20 @@ def test_loc_expand_empty_frame_keep_midx_names(self): tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( - "val, idxr, warn", + "val, idxr", [ - ("x", "a", FutureWarning), - ("x", ["a"], FutureWarning), - (1, "a", FutureWarning), - (1, ["a"], FutureWarning), + ("x", "a"), + ("x", ["a"]), + (1, "a"), + (1, ["a"]), ], ) - def test_loc_setitem_rhs_frame(self, idxr, val, warn): + def test_loc_setitem_rhs_frame(self, idxr, val): # GH#47578 df = DataFrame({"a": [1, 2]}) with tm.assert_produces_warning( - warn, match="Setting an item of incompatible dtype" + FutureWarning, match="Setting an item of incompatible dtype" ): df.loc[:, idxr] = DataFrame({"a": [val, 11]}, index=[1, 2]) expected = DataFrame({"a": [np.nan, val]}) From 3ff469f535e904d18fcd656d2295d6574ece1a28 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 24 Nov 2023 13:45:55 +0000 Subject: [PATCH 05/35] wip --- pandas/core/frame.py | 2 +- pandas/core/internals/managers.py | 9 ++++++++- pandas/tests/groupby/test_groupby.py | 6 ++++-- pandas/tests/indexing/test_indexing.py | 14 ++++++++++---- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3fa823b5267d8..0a6720ad15794 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4423,7 +4423,7 @@ def _iset_item(self, loc: int, value: Series, inplace: bool = True) -> None: loc, value._values, inplace=inplace, refs=value._references ) else: - self._iset_item_mgr(loc, value._values.copy(), inplace=True) + self._iset_item_mgr(loc, value._values.copy(), inplace=inplace) # check if we are modifying a copy # try to set first as we want an invalid diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 1997c86582cc8..bf84a700d2141 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1326,7 +1326,14 @@ def _iset_single( iloc = self.blklocs[loc] blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy) return - elif inplace: + elif inplace and blk.dtype != np.void: + # Exclude np.void, as that is a special case for expansion. + # We want to warn for + # df = pd.DataFrame({'a': [1, 2]}) + # df.loc[:, 'a'] = .3 + # but not for + # df = pd.DataFrame({'a': [1, 2]}) + # df.loc[:, 'b'] = .3 warnings.warn( f"Setting an item of incompatible dtype is deprecated " "and will raise in a future error of pandas. " diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c61d9fab0435e..39b2964dd2d5b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -199,7 +199,8 @@ def f_1(grp): with tm.assert_produces_warning(FutureWarning, match=msg): result = df.groupby("A").apply(f_1)[["B"]] e = expected.copy() - e.loc["Tiger"] = np.nan + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + e.loc["Tiger"] = np.nan tm.assert_frame_equal(result, e) def f_2(grp): @@ -211,7 +212,8 @@ def f_2(grp): with tm.assert_produces_warning(FutureWarning, match=msg): result = df.groupby("A").apply(f_2)[["B"]] e = expected.copy() - e.loc["Pony"] = np.nan + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + e.loc["Pony"] = np.nan tm.assert_frame_equal(result, e) # 5592 revisited, with datetimes diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index bdbbcabcaab0e..b9e643a3535b9 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -828,7 +828,8 @@ def test_coercion_with_loc(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - start_dataframe.loc[0, ["foo"]] = None + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + start_dataframe.loc[0, ["foo"]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -838,7 +839,8 @@ def test_coercion_with_setitem_and_dataframe(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -848,7 +850,10 @@ def test_none_coercion_loc_and_dataframe(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - start_dataframe.loc[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + start_dataframe.loc[ + start_dataframe["foo"] == start_dataframe["foo"][0] + ] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -862,7 +867,8 @@ def test_none_coercion_mixed_dtypes(self): "d": ["a", "b", "c"], } ) - start_dataframe.iloc[0] = None + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + start_dataframe.iloc[0] = None exp = DataFrame( { From cd9408e2d450d168e542a5914670264c5c06c93f Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 08:21:57 +0000 Subject: [PATCH 06/35] fixup --- pandas/io/stata.py | 2 +- pandas/tests/frame/methods/test_replace.py | 4 +++- pandas/tests/indexing/test_iloc.py | 3 ++- pandas/tests/indexing/test_indexing.py | 6 +++--- pandas/tests/io/json/test_pandas.py | 4 +++- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 66d0957f0d089..236edca91a549 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1876,7 +1876,7 @@ def _insert_strls(self, data: DataFrame) -> DataFrame: if typ != "Q": continue # Wrap v_o in a string to allow uint64 values as keys on 32bit OS - data.iloc[:, i] = [self.GSO[str(k)] for k in data.iloc[:, i]] + data[data.columns[i]] = [self.GSO[str(k)] for k in data.iloc[:, i]] return data def _do_select_columns(self, data: DataFrame, columns: Sequence[str]) -> DataFrame: diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index f07c53060a06b..f0a279c37f6c2 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1288,7 +1288,9 @@ def test_categorical_replace_with_dict(self, replace_dict, final_data): with pytest.raises(AssertionError, match=msg): # ensure non-inplace call does not affect original tm.assert_frame_equal(df, expected) - return_value = df.replace(replace_dict, 3, inplace=True) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + # todo should this be warning? + return_value = df.replace(replace_dict, 3, inplace=True) assert return_value is None tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index cbcbf3396363a..2cf49630dfd4c 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1460,6 +1460,7 @@ def test_iloc_setitem_pure_position_based(self): def test_iloc_nullable_int64_size_1_nan(self): # GH 31861 result = DataFrame({"a": ["test"], "b": [np.nan]}) - result.loc[:, "b"] = result.loc[:, "b"].astype("Int64") + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + result.loc[:, "b"] = result.loc[:, "b"].astype("Int64") expected = DataFrame({"a": ["test"], "b": array([NA], dtype="Int64")}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index b9e643a3535b9..e06f5303dd724 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -828,7 +828,7 @@ def test_coercion_with_loc(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + with tm.assert_produces_warning(warn, match="incompatible dtype"): start_dataframe.loc[0, ["foo"]] = None expected_dataframe = DataFrame({"foo": expected_result}) @@ -839,7 +839,7 @@ def test_coercion_with_setitem_and_dataframe(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + with tm.assert_produces_warning(warn, match="incompatible dtype"): start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None expected_dataframe = DataFrame({"foo": expected_result}) @@ -850,7 +850,7 @@ def test_none_coercion_loc_and_dataframe(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + with tm.assert_produces_warning(warn, match="incompatible dtype"): start_dataframe.loc[ start_dataframe["foo"] == start_dataframe["foo"][0] ] = None diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 7312facc44c26..05fff07baaea3 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -168,7 +168,9 @@ def test_frame_non_unique_columns(self, orient, data): # in milliseconds; these are internally stored in nanosecond, # so divide to get where we need # TODO: a to_epoch method would also solve; see GH 14772 - expected.iloc[:, 0] = expected.iloc[:, 0].view(np.int64) // 1000000 + expected[expected.columns[0]] = ( + expected.iloc[:, 0].view(np.int64) // 1000000 + ) elif orient == "split": expected = df expected.columns = ["x", "x.1"] From 58ce78e8509a8a22d86d97d99ef4b0b6b83e7005 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 09:20:38 +0000 Subject: [PATCH 07/35] more fixups --- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/indexing/test_loc.py | 22 ++++++++++++++-------- pandas/tests/reshape/merge/test_merge.py | 2 +- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 3111075c5c1a7..47a537ec45d7d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2830,7 +2830,7 @@ def test_dict_data_arrow_column_expansion(self, key_val, col_vals, col_type): ) result = DataFrame({key_val: [1, 2]}, columns=cols) expected = DataFrame([[1, np.nan], [2, np.nan]], columns=cols) - expected.iloc[:, 1] = expected.iloc[:, 1].astype(object) + expected[expected.columns[1]] = expected.iloc[:, 1].astype(object) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 96fd3f4e6fca0..e50521c1fee87 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -581,7 +581,8 @@ def test_loc_setitem_consistency(self, frame_for_consistency, val): } ) df = frame_for_consistency.copy() - df.loc[:, "date"] = val + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[:, "date"] = val tm.assert_frame_equal(df, expected) def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency): @@ -595,7 +596,8 @@ def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency): } ) df = frame_for_consistency.copy() - df.loc[:, "date"] = "foo" + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[:, "date"] = "foo" tm.assert_frame_equal(df, expected) def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency): @@ -608,14 +610,16 @@ def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency): } ) df = frame_for_consistency.copy() - df.loc[:, "date"] = 1.0 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[:, "date"] = 1.0 tm.assert_frame_equal(df, expected) def test_loc_setitem_consistency_single_row(self): # GH 15494 # setting on frame with single row df = DataFrame({"date": Series([Timestamp("20180101")])}) - df.loc[:, "date"] = "string" + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[:, "date"] = "string" expected = DataFrame({"date": Series(["string"])}) tm.assert_frame_equal(df, expected) @@ -675,9 +679,10 @@ def test_loc_setitem_consistency_slice_column_len(self): # timedelta64[m] -> float, so this cannot be done inplace, so # no warning - df.loc[:, ("Respondent", "Duration")] = df.loc[ - :, ("Respondent", "Duration") - ] / Timedelta(60_000_000_000) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[:, ("Respondent", "Duration")] = df.loc[ + :, ("Respondent", "Duration") + ] / Timedelta(60_000_000_000) expected = Series( [23.0, 12.0, 14.0, 36.0], index=df.index, name=("Respondent", "Duration") @@ -1198,7 +1203,8 @@ def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self): df = DataFrame(columns=["x", "y"]) df["x"] = df["x"].astype(np.int64) - df.loc[:, "x"] = data + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[:, "x"] = data tm.assert_frame_equal(df, expected) def test_loc_setitem_empty_append_single_value(self): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 7538894bbf1c9..c0cdf5a7bae86 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2986,5 +2986,5 @@ def test_merge_empty_frames_column_order(left_empty, right_empty): elif left_empty: expected.loc[:, "B"] = np.nan elif right_empty: - expected.loc[:, ["C", "D"]] = np.nan + expected[["C", "D"]] = np.nan tm.assert_frame_equal(result, expected) From 65647735931e3f6ac2ae57742a1f8bf5fd2cc6b5 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 10:37:31 +0000 Subject: [PATCH 08/35] fixup remaining tests (but are they right?) --- pandas/core/internals/managers.py | 1 - pandas/tests/copy_view/test_indexing.py | 5 ++++- pandas/tests/frame/indexing/test_indexing.py | 8 ++++++-- pandas/tests/frame/methods/test_update.py | 5 +---- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index bf84a700d2141..e4c3c954ced1f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1317,7 +1317,6 @@ def _iset_single( are unaffected. """ # Caller is responsible for verifying value.shape - if inplace and blk.should_store(value): copy = False if using_copy_on_write() and not self._has_no_reference_block(blkno): diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 2e623f885b648..7de51727004a6 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1136,7 +1136,10 @@ def test_set_value_copy_only_necessary_column( ): indexer_func(df)[indexer] = val else: - with tm.assert_cow_warning(warn_copy_on_write): + with tm.assert_cow_warning( + warn_copy_on_write or (using_copy_on_write and val == "a"), + match="set on a copy|incompatible dtype", + ): indexer_func(df)[indexer] = val if using_copy_on_write: diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 1433c54004652..6896cd7b3129c 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1395,13 +1395,17 @@ def test_loc_setitem_rhs_frame(self, idxr, val): tm.assert_frame_equal(df, expected) @td.skip_array_manager_invalid_test - def test_iloc_setitem_enlarge_no_warning(self, warn_copy_on_write): + def test_iloc_setitem_enlarge_no_warning( + self, warn_copy_on_write, using_copy_on_write + ): # GH#47381 df = DataFrame(columns=["a", "b"]) expected = df.copy() view = df[:] # TODO(CoW-warn) false positive: shouldn't warn in case of enlargement? - with tm.assert_produces_warning(FutureWarning if warn_copy_on_write else None): + with tm.assert_produces_warning( + FutureWarning if warn_copy_on_write or using_copy_on_write else None + ): df.iloc[:, 0] = np.array([1, 2], dtype=np.float64) tm.assert_frame_equal(view, expected) diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 0d32788b04b03..5431360992d5d 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -144,11 +144,8 @@ def test_update_with_different_dtype(self, using_copy_on_write): # GH#3217 df = DataFrame({"a": [1, 3], "b": [np.nan, 2]}) df["c"] = np.nan - if using_copy_on_write: + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): df.update({"c": Series(["foo"], index=[0])}) - else: - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - df["c"].update(Series(["foo"], index=[0])) expected = DataFrame({"a": [1, 3], "b": [np.nan, 2], "c": ["foo", np.nan]}) tm.assert_frame_equal(df, expected) From 4c345e415afab40d408bb03ac23070789c431fdb Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 10:42:15 +0000 Subject: [PATCH 09/35] :art: --- pandas/core/internals/managers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e4c3c954ced1f..bf84a700d2141 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1317,6 +1317,7 @@ def _iset_single( are unaffected. """ # Caller is responsible for verifying value.shape + if inplace and blk.should_store(value): copy = False if using_copy_on_write() and not self._has_no_reference_block(blkno): From ea51b083442599849e8f84c9303f4b3e8c45c514 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 10:46:29 +0000 Subject: [PATCH 10/35] dont warn on expected --- pandas/tests/groupby/test_groupby.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 33022a63cdaad..dba64e439ac30 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -199,8 +199,7 @@ def f_1(grp): with tm.assert_produces_warning(FutureWarning, match=msg): result = df.groupby("A").apply(f_1)[["B"]] e = expected.copy() - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - e.loc["Tiger"] = np.nan + e["B"] = [3, 5, float("nan")] tm.assert_frame_equal(result, e) def f_2(grp): @@ -212,8 +211,7 @@ def f_2(grp): with tm.assert_produces_warning(FutureWarning, match=msg): result = df.groupby("A").apply(f_2)[["B"]] e = expected.copy() - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - e.loc["Pony"] = np.nan + e["B"] = [3, float("nan"), 0] tm.assert_frame_equal(result, e) # 5592 revisited, with datetimes From 14c9ff62ab45bf5623e076d4820783e5637e4163 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 11:18:48 +0000 Subject: [PATCH 11/35] cow fixup --- pandas/tests/copy_view/test_indexing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 7de51727004a6..02e44286a0fc4 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1135,10 +1135,14 @@ def test_set_value_copy_only_necessary_column( FutureWarning, match="Setting an item of incompatible dtype is deprecated" ): indexer_func(df)[indexer] = val + elif val == "a" and (warn_copy_on_write or using_copy_on_write): + with tm.assert_produces_warning( + FutureWarning, match="incompatible dtype|Setting a value on a view" + ): + indexer_func(df)[indexer] = val else: with tm.assert_cow_warning( - warn_copy_on_write or (using_copy_on_write and val == "a"), - match="set on a copy|incompatible dtype", + warn_copy_on_write, ): indexer_func(df)[indexer] = val From 846b529197a8874b83d758c7ad80ca97035c418d Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 11:30:18 +0000 Subject: [PATCH 12/35] :art: --- pandas/tests/copy_view/test_indexing.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 02e44286a0fc4..d703c0474fa45 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1141,9 +1141,7 @@ def test_set_value_copy_only_necessary_column( ): indexer_func(df)[indexer] = val else: - with tm.assert_cow_warning( - warn_copy_on_write, - ): + with tm.assert_cow_warning(warn_copy_on_write): indexer_func(df)[indexer] = val if using_copy_on_write: From 4f5495c0ec766d25b463e902b05c1bf92100d71e Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 11:30:46 +0000 Subject: [PATCH 13/35] :art: --- pandas/tests/frame/methods/test_replace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index f0a279c37f6c2..5884c348770a9 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1289,7 +1289,7 @@ def test_categorical_replace_with_dict(self, replace_dict, final_data): # ensure non-inplace call does not affect original tm.assert_frame_equal(df, expected) with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - # todo should this be warning? + # todo should this really be warning? return_value = df.replace(replace_dict, 3, inplace=True) assert return_value is None tm.assert_frame_equal(df, expected) From 9c26bb348d06c8d63e448cea7698885d102f92de Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 11:48:49 +0000 Subject: [PATCH 14/35] catch len-block>1 case too --- pandas/core/internals/managers.py | 17 ++++++++++++++++- pandas/tests/copy_view/test_indexing.py | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index bf84a700d2141..87a4c12f62c82 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1184,6 +1184,22 @@ def value_getitem(placement): blk.set_inplace(blk_locs, value_getitem(val_locs)) continue else: + if inplace and blk.dtype != np.void: + # Exclude np.void, as that is a special case for expansion. + # We want to warn for + # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + # df.loc[:, 'a'] = .3 + # but not for + # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + # df.loc[:, 'b'] = .3 + warnings.warn( + f"Setting an item of incompatible dtype is deprecated " + "and will raise in a future error of pandas. " + f"Value '{value}' has dtype incompatible with {blk.dtype}, " + "please explicitly cast to a compatible dtype first.", + FutureWarning, + stacklevel=find_stack_level(), + ) unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs]) unfit_val_locs.append(val_locs) @@ -1317,7 +1333,6 @@ def _iset_single( are unaffected. """ # Caller is responsible for verifying value.shape - if inplace and blk.should_store(value): copy = False if using_copy_on_write() and not self._has_no_reference_block(blkno): diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index d703c0474fa45..ff7b83639968e 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1129,7 +1129,7 @@ def test_set_value_copy_only_necessary_column( df_orig = df.copy() view = df[:] - if val == "a" and indexer[0] != slice(None): + if val == "a": # TODO(CoW-warn) assert the FutureWarning for CoW is also raised with tm.assert_produces_warning( FutureWarning, match="Setting an item of incompatible dtype is deprecated" From 3f82a9153b1d7ba15b3a2032e7ed8faa419c3a6f Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 11:55:29 +0000 Subject: [PATCH 15/35] fixup tests --- pandas/tests/frame/indexing/test_indexing.py | 6 ++---- pandas/tests/frame/indexing/test_setitem.py | 3 ++- pandas/tests/indexing/test_iloc.py | 3 ++- pandas/tests/indexing/test_loc.py | 3 ++- pandas/tests/reshape/merge/test_merge.py | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 6896cd7b3129c..7c5b55adac0af 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1402,10 +1402,8 @@ def test_iloc_setitem_enlarge_no_warning( df = DataFrame(columns=["a", "b"]) expected = df.copy() view = df[:] - # TODO(CoW-warn) false positive: shouldn't warn in case of enlargement? - with tm.assert_produces_warning( - FutureWarning if warn_copy_on_write or using_copy_on_write else None - ): + # TODO(pdep6-warn) false positive: shouldn't warn in case of enlargement? + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): df.iloc[:, 0] = np.array([1, 2], dtype=np.float64) tm.assert_frame_equal(view, expected) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index a3cb33275d80f..64a2120579cb3 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -860,7 +860,8 @@ def test_setitem_string_column_numpy_dtype_raising(self): def test_setitem_empty_df_duplicate_columns(self, using_copy_on_write): # GH#38521 df = DataFrame(columns=["a", "b", "b"], dtype="float64") - df.loc[:, "a"] = list(range(2)) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[:, "a"] = list(range(2)) expected = DataFrame( [[0, np.nan, np.nan], [1, np.nan, np.nan]], columns=["a", "b", "b"] ) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 2cf49630dfd4c..10a4364b3f98f 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -531,7 +531,8 @@ def test_iloc_setitem_frame_duplicate_columns_multiple_blocks( # if the assigned values cannot be held by existing integer arrays, # we cast - df.iloc[:, 0] = df.iloc[:, 0] + 0.5 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.iloc[:, 0] = df.iloc[:, 0] + 0.5 if not using_array_manager: assert len(df._mgr.blocks) == 2 diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index e50521c1fee87..c273c5e67184e 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1190,7 +1190,8 @@ def test_loc_setitem_empty_append_expands_rows(self): # appends to fit length of data df = DataFrame(columns=["x", "y"]) - df.loc[:, "x"] = data + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[:, "x"] = data tm.assert_frame_equal(df, expected) def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index c0cdf5a7bae86..5fb4f2a232846 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2984,7 +2984,7 @@ def test_merge_empty_frames_column_order(left_empty, right_empty): if left_empty and right_empty: expected = expected.iloc[:0] elif left_empty: - expected.loc[:, "B"] = np.nan + expected["B"] = np.nan elif right_empty: expected[["C", "D"]] = np.nan tm.assert_frame_equal(result, expected) From 874b596e5f1a2cd62592da2649a31cab07172e03 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 11:57:53 +0000 Subject: [PATCH 16/35] simplify --- pandas/core/internals/managers.py | 1 + pandas/tests/copy_view/test_indexing.py | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 87a4c12f62c82..f8587c4e7c5c6 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1333,6 +1333,7 @@ def _iset_single( are unaffected. """ # Caller is responsible for verifying value.shape + if inplace and blk.should_store(value): copy = False if using_copy_on_write() and not self._has_no_reference_block(blkno): diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index ff7b83639968e..69501f09f9455 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1130,12 +1130,11 @@ def test_set_value_copy_only_necessary_column( view = df[:] if val == "a": - # TODO(CoW-warn) assert the FutureWarning for CoW is also raised with tm.assert_produces_warning( FutureWarning, match="Setting an item of incompatible dtype is deprecated" ): indexer_func(df)[indexer] = val - elif val == "a" and (warn_copy_on_write or using_copy_on_write): + elif val == "a" and warn_copy_on_write: with tm.assert_produces_warning( FutureWarning, match="incompatible dtype|Setting a value on a view" ): From 162586c39cf3d4ceefd06f1f80f8beb6446ba501 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 12:03:00 +0000 Subject: [PATCH 17/35] simplify --- pandas/tests/copy_view/test_indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 69501f09f9455..aa3f23ffed25e 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1129,12 +1129,12 @@ def test_set_value_copy_only_necessary_column( df_orig = df.copy() view = df[:] - if val == "a": + if val == "a" and not warn_copy_on_write: with tm.assert_produces_warning( FutureWarning, match="Setting an item of incompatible dtype is deprecated" ): indexer_func(df)[indexer] = val - elif val == "a" and warn_copy_on_write: + if val == "a" and warn_copy_on_write: with tm.assert_produces_warning( FutureWarning, match="incompatible dtype|Setting a value on a view" ): From 467742c7dbad1b654ccc283afd6eb5ece46be99c Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 12:09:42 +0000 Subject: [PATCH 18/35] remove outdated comment --- pandas/tests/frame/methods/test_replace.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 5884c348770a9..078afc0c5cfc7 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1289,7 +1289,6 @@ def test_categorical_replace_with_dict(self, replace_dict, final_data): # ensure non-inplace call does not affect original tm.assert_frame_equal(df, expected) with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - # todo should this really be warning? return_value = df.replace(replace_dict, 3, inplace=True) assert return_value is None tm.assert_frame_equal(df, expected) From 5d24fb0db13bebea96b470e55c0c522e04a11153 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Nov 2023 12:38:50 +0000 Subject: [PATCH 19/35] fixup test_internals test --- pandas/tests/copy_view/test_internals.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py index a727331307d7e..7e1bdc4229a4a 100644 --- a/pandas/tests/copy_view/test_internals.py +++ b/pandas/tests/copy_view/test_internals.py @@ -108,7 +108,11 @@ def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype): arr = arr.astype(dtype) df_orig = df.copy() df2 = df.copy(deep=None) # Trigger a CoW (if enabled, otherwise makes copy) - df2._mgr.iset(locs, arr, inplace=True) + with tm.assert_produces_warning( + FutureWarning if dtype == np.int8 or locs == [5] else None, + match="incompatible dtype", + ): + df2._mgr.iset(locs, arr, inplace=True) tm.assert_frame_equal(df, df_orig) From a43b7370f0f20b320dd657cf16031b2d13221dcf Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 27 Nov 2023 21:12:12 +0100 Subject: [PATCH 20/35] Update indexing unpacking logic for single block case --- pandas/core/indexing.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 13756dd5a70e4..c65cf7322deb5 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2125,6 +2125,12 @@ def _setitem_single_block(self, indexer, value, name: str) -> None: """ from pandas import Series + if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict): + # TODO(EA): ExtensionBlock.setitem this causes issues with + # setting for extensionarrays that store dicts. Need to decide + # if it's worth supporting that. + value = self._align_series(indexer, Series(value)) + info_axis = self.obj._info_axis_number item_labels = self.obj._get_axis(info_axis) if isinstance(indexer, tuple): @@ -2145,13 +2151,7 @@ def _setitem_single_block(self, indexer, value, name: str) -> None: indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align - if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict): - # TODO(EA): ExtensionBlock.setitem this causes issues with - # setting for extensionarrays that store dicts. Need to decide - # if it's worth supporting that. - value = self._align_series(indexer, Series(value)) - - elif isinstance(value, ABCDataFrame) and name != "iloc": + if isinstance(value, ABCDataFrame) and name != "iloc": value = self._align_frame(indexer, value)._values # check for chained assignment From 7a25a86d6a6a008dd507c9c8cca1ddd19d59dbca Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 28 Nov 2023 12:36:04 +0000 Subject: [PATCH 21/35] fixing stuff up? --- pandas/core/frame.py | 8 ++--- pandas/core/indexing.py | 33 +++++++++++++++----- pandas/core/internals/blocks.py | 7 ++++- pandas/core/internals/managers.py | 32 ------------------- pandas/tests/copy_view/test_internals.py | 6 +--- pandas/tests/frame/indexing/test_indexing.py | 9 +++--- 6 files changed, 40 insertions(+), 55 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0a6720ad15794..5d05983529fba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4157,7 +4157,7 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar: loc = engine.get_loc(index) return series._values[loc] - def isetitem(self, loc, value, *, inplace: bool = False) -> None: + def isetitem(self, loc, value) -> None: """ Set the given value in the column with position `loc`. @@ -4193,11 +4193,11 @@ def isetitem(self, loc, value, *, inplace: bool = False) -> None: for i, idx in enumerate(loc): arraylike, refs = self._sanitize_column(value.iloc[:, i]) - self._iset_item_mgr(idx, arraylike, inplace=inplace, refs=refs) + self._iset_item_mgr(idx, arraylike, inplace=False, refs=refs) return arraylike, refs = self._sanitize_column(value) - self._iset_item_mgr(loc, arraylike, inplace=inplace, refs=refs) + self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs) def __setitem__(self, key, value) -> None: if not PYPY and using_copy_on_write(): @@ -4423,7 +4423,7 @@ def _iset_item(self, loc: int, value: Series, inplace: bool = True) -> None: loc, value._values, inplace=inplace, refs=value._references ) else: - self._iset_item_mgr(loc, value._values.copy(), inplace=inplace) + self._iset_item_mgr(loc, value._values.copy(), inplace=True) # check if we are modifying a copy # try to set first as we want an invalid diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3399144fc5ef5..220b36c9e773f 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2111,7 +2111,24 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: # If we're setting an entire column and we can't do it inplace, # then we can use value's dtype (or inferred dtype) # instead of object - self.obj.isetitem(loc, value, inplace=True) + dtype = self.obj.dtypes.iloc[loc] + if dtype != np.void: + # Exclude np.void, as that is a special case for expansion. + # We want to warn for + # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + # df.loc[:, 'a'] = .3 + # but not for + # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + # df.loc[:, 'b'] = .3 + warnings.warn( + f"Setting an item of incompatible dtype is deprecated " + "and will raise in a future error of pandas. " + f"Value '{value}' has dtype incompatible with {dtype}, " + "please explicitly cast to a compatible dtype first.", + FutureWarning, + stacklevel=find_stack_level(), + ) + self.obj.isetitem(loc, value) else: # set value into the column (first attempting to operate inplace, then # falling back to casting if necessary) @@ -2125,12 +2142,6 @@ def _setitem_single_block(self, indexer, value, name: str) -> None: """ from pandas import Series - if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict): - # TODO(EA): ExtensionBlock.setitem this causes issues with - # setting for extensionarrays that store dicts. Need to decide - # if it's worth supporting that. - value = self._align_series(indexer, Series(value)) - info_axis = self.obj._info_axis_number item_labels = self.obj._get_axis(info_axis) if isinstance(indexer, tuple): @@ -2151,7 +2162,13 @@ def _setitem_single_block(self, indexer, value, name: str) -> None: indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align - if isinstance(value, ABCDataFrame) and name != "iloc": + if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict): + # TODO(EA): ExtensionBlock.setitem this causes issues with + # setting for extensionarrays that store dicts. Need to decide + # if it's worth supporting that. + value = self._align_series(indexer, Series(value)) + + elif isinstance(value, ABCDataFrame) and name != "iloc": value = self._align_frame(indexer, value)._values # check for chained assignment diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 535d18f99f0ef..75771529e7323 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -475,7 +475,12 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: and isna(other) and other is not NaT ): - warn_on_upcast = False + try: + is_np_nat = np.isnat(other) + except TypeError: + is_np_nat = False + if not is_np_nat: + warn_on_upcast = False elif ( isinstance(other, np.ndarray) and other.ndim == 1 diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f8587c4e7c5c6..843441e4865c7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1184,22 +1184,6 @@ def value_getitem(placement): blk.set_inplace(blk_locs, value_getitem(val_locs)) continue else: - if inplace and blk.dtype != np.void: - # Exclude np.void, as that is a special case for expansion. - # We want to warn for - # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) - # df.loc[:, 'a'] = .3 - # but not for - # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) - # df.loc[:, 'b'] = .3 - warnings.warn( - f"Setting an item of incompatible dtype is deprecated " - "and will raise in a future error of pandas. " - f"Value '{value}' has dtype incompatible with {blk.dtype}, " - "please explicitly cast to a compatible dtype first.", - FutureWarning, - stacklevel=find_stack_level(), - ) unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs]) unfit_val_locs.append(val_locs) @@ -1342,22 +1326,6 @@ def _iset_single( iloc = self.blklocs[loc] blk.set_inplace(slice(iloc, iloc + 1), value, copy=copy) return - elif inplace and blk.dtype != np.void: - # Exclude np.void, as that is a special case for expansion. - # We want to warn for - # df = pd.DataFrame({'a': [1, 2]}) - # df.loc[:, 'a'] = .3 - # but not for - # df = pd.DataFrame({'a': [1, 2]}) - # df.loc[:, 'b'] = .3 - warnings.warn( - f"Setting an item of incompatible dtype is deprecated " - "and will raise in a future error of pandas. " - f"Value '{value}' has dtype incompatible with {blk.dtype}, " - "please explicitly cast to a compatible dtype first.", - FutureWarning, - stacklevel=find_stack_level(), - ) nb = new_block_2d(value, placement=blk._mgr_locs, refs=refs) old_blocks = self.blocks diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py index 7e1bdc4229a4a..a727331307d7e 100644 --- a/pandas/tests/copy_view/test_internals.py +++ b/pandas/tests/copy_view/test_internals.py @@ -108,11 +108,7 @@ def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype): arr = arr.astype(dtype) df_orig = df.copy() df2 = df.copy(deep=None) # Trigger a CoW (if enabled, otherwise makes copy) - with tm.assert_produces_warning( - FutureWarning if dtype == np.int8 or locs == [5] else None, - match="incompatible dtype", - ): - df2._mgr.iset(locs, arr, inplace=True) + df2._mgr.iset(locs, arr, inplace=True) tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 7c5b55adac0af..f405de2550921 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -337,14 +337,12 @@ def test_setitem(self, float_frame, using_copy_on_write, warn_copy_on_write): def test_setitem2(self): # dtype changing GH4204 df = DataFrame([[0, 0]]) - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - df.iloc[0] = np.nan + df.iloc[0] = np.nan expected = DataFrame([[np.nan, np.nan]]) tm.assert_frame_equal(df, expected) df = DataFrame([[0, 0]]) - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - df.loc[0] = np.nan + df.loc[0] = np.nan tm.assert_frame_equal(df, expected) def test_setitem_boolean(self, float_frame): @@ -1498,7 +1496,8 @@ def test_loc_rhs_empty_warning(self): df = DataFrame(columns=["a", "b"]) expected = df.copy() rhs = DataFrame(columns=["a"]) - with tm.assert_produces_warning(None): + # with tm.assert_produces_warning(None): + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): df.loc[:, "a"] = rhs tm.assert_frame_equal(df, expected) From 3240bb9a793af18ac4942bf399492af99f0d29fd Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 28 Nov 2023 12:59:44 +0000 Subject: [PATCH 22/35] wip try fixing up? --- pandas/core/indexing.py | 14 +++++++------- pandas/tests/frame/methods/test_replace.py | 3 +-- pandas/tests/frame/methods/test_update.py | 5 ++++- pandas/tests/indexing/test_indexing.py | 14 ++++---------- pandas/tests/indexing/test_loc.py | 5 ++++- pandas/tests/series/indexing/test_indexing.py | 2 +- 6 files changed, 21 insertions(+), 22 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 220b36c9e773f..2e70dda520fd3 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2142,6 +2142,12 @@ def _setitem_single_block(self, indexer, value, name: str) -> None: """ from pandas import Series + if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict): + # TODO(EA): ExtensionBlock.setitem this causes issues with + # setting for extensionarrays that store dicts. Need to decide + # if it's worth supporting that. + value = self._align_series(indexer, Series(value)) + info_axis = self.obj._info_axis_number item_labels = self.obj._get_axis(info_axis) if isinstance(indexer, tuple): @@ -2162,13 +2168,7 @@ def _setitem_single_block(self, indexer, value, name: str) -> None: indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align - if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict): - # TODO(EA): ExtensionBlock.setitem this causes issues with - # setting for extensionarrays that store dicts. Need to decide - # if it's worth supporting that. - value = self._align_series(indexer, Series(value)) - - elif isinstance(value, ABCDataFrame) and name != "iloc": + if isinstance(value, ABCDataFrame) and name != "iloc": value = self._align_frame(indexer, value)._values # check for chained assignment diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 078afc0c5cfc7..f07c53060a06b 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1288,8 +1288,7 @@ def test_categorical_replace_with_dict(self, replace_dict, final_data): with pytest.raises(AssertionError, match=msg): # ensure non-inplace call does not affect original tm.assert_frame_equal(df, expected) - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - return_value = df.replace(replace_dict, 3, inplace=True) + return_value = df.replace(replace_dict, 3, inplace=True) assert return_value is None tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 5431360992d5d..a77378587fe23 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -135,7 +135,10 @@ def test_update_from_non_df(self): def test_update_datetime_tz(self): # GH 25807 result = DataFrame([pd.Timestamp("2019", tz="UTC")]) - with tm.assert_produces_warning(None): + # with tm.assert_produces_warning(None): + with tm.assert_produces_warning( + FutureWarning, match="incompatible dtype" + ): # todo wrong result.update(result) expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index db4e2221517cc..d6ec7ac3e4185 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -846,8 +846,7 @@ def test_coercion_with_loc(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - with tm.assert_produces_warning(warn, match="incompatible dtype"): - start_dataframe.loc[0, ["foo"]] = None + start_dataframe.loc[0, ["foo"]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -857,8 +856,7 @@ def test_coercion_with_setitem_and_dataframe(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - with tm.assert_produces_warning(warn, match="incompatible dtype"): - start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -868,10 +866,7 @@ def test_none_coercion_loc_and_dataframe(self, expected): start_data, expected_result, warn = expected start_dataframe = DataFrame({"foo": start_data}) - with tm.assert_produces_warning(warn, match="incompatible dtype"): - start_dataframe.loc[ - start_dataframe["foo"] == start_dataframe["foo"][0] - ] = None + start_dataframe.loc[start_dataframe["foo"] == start_dataframe["foo"][0]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @@ -885,8 +880,7 @@ def test_none_coercion_mixed_dtypes(self): "d": ["a", "b", "c"], } ) - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - start_dataframe.iloc[0] = None + start_dataframe.iloc[0] = None exp = DataFrame( { diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 193034febbb36..32af86785fba6 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1489,7 +1489,10 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): # if result started off with object dtype, then the .loc.__setitem__ # below would retain object dtype result = DataFrame(index=idx, columns=["var"], dtype=np.float64) - result.loc[:, idxer] = expected + with tm.assert_produces_warning( + FutureWarning if idxer == "var" else None, match="incompatible dtype" + ): # todo + result.loc[:, idxer] = expected tm.assert_frame_equal(result, expected) def test_loc_setitem_time_key(self, using_array_manager): diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 0ebed75987d96..b1d5a803d1678 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -503,7 +503,7 @@ def test_setitem_validation_scalar_bool(self, invalid, indexer): @pytest.mark.parametrize("indexer", _indexers) def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer): ser = Series([1, 2, 3], dtype=any_int_numpy_dtype) - if isna(invalid) and invalid is not NaT: + if isna(invalid) and invalid is not NaT and not np.isnat(invalid): warn = None else: warn = FutureWarning From 755ded6b7e96c16a0dcf31ec786238d30ba6c971 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 28 Nov 2023 13:27:21 +0000 Subject: [PATCH 23/35] try fixup --- pandas/core/indexing.py | 2 +- pandas/tests/frame/indexing/test_indexing.py | 7 ++----- pandas/tests/indexing/test_loc.py | 5 ++--- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2e70dda520fd3..8362caa5bd9b6 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2112,7 +2112,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: # then we can use value's dtype (or inferred dtype) # instead of object dtype = self.obj.dtypes.iloc[loc] - if dtype != np.void: + if dtype not in (np.void, object): # Exclude np.void, as that is a special case for expansion. # We want to warn for # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index f405de2550921..3b8212bec4549 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1400,9 +1400,7 @@ def test_iloc_setitem_enlarge_no_warning( df = DataFrame(columns=["a", "b"]) expected = df.copy() view = df[:] - # TODO(pdep6-warn) false positive: shouldn't warn in case of enlargement? - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - df.iloc[:, 0] = np.array([1, 2], dtype=np.float64) + df.iloc[:, 0] = np.array([1, 2], dtype=np.float64) tm.assert_frame_equal(view, expected) def test_loc_internals_not_updated_correctly(self): @@ -1496,8 +1494,7 @@ def test_loc_rhs_empty_warning(self): df = DataFrame(columns=["a", "b"]) expected = df.copy() rhs = DataFrame(columns=["a"]) - # with tm.assert_produces_warning(None): - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + with tm.assert_produces_warning(None): df.loc[:, "a"] = rhs tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 32af86785fba6..225d27ad40f93 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1194,8 +1194,7 @@ def test_loc_setitem_empty_append_expands_rows(self): # appends to fit length of data df = DataFrame(columns=["x", "y"]) - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - df.loc[:, "x"] = data + df.loc[:, "x"] = data tm.assert_frame_equal(df, expected) def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self): @@ -1491,7 +1490,7 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): result = DataFrame(index=idx, columns=["var"], dtype=np.float64) with tm.assert_produces_warning( FutureWarning if idxer == "var" else None, match="incompatible dtype" - ): # todo + ): # false positive? result.loc[:, idxer] = expected tm.assert_frame_equal(result, expected) From 28a731b63e05d55bd0f7bc2ae8d520426ea0fe41 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 28 Nov 2023 15:05:05 +0000 Subject: [PATCH 24/35] copy-on-write test --- pandas/tests/frame/indexing/test_indexing.py | 4 +++- pandas/tests/indexing/test_loc.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 3b8212bec4549..c971072084d72 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1400,7 +1400,9 @@ def test_iloc_setitem_enlarge_no_warning( df = DataFrame(columns=["a", "b"]) expected = df.copy() view = df[:] - df.iloc[:, 0] = np.array([1, 2], dtype=np.float64) + # TODO(CoW-warn) false positive: shouldn't warn in case of enlargement? + with tm.assert_produces_warning(FutureWarning if warn_copy_on_write else None): + df.iloc[:, 0] = np.array([1, 2], dtype=np.float64) tm.assert_frame_equal(view, expected) def test_loc_internals_not_updated_correctly(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 225d27ad40f93..f6a93202afa9a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1490,7 +1490,8 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): result = DataFrame(index=idx, columns=["var"], dtype=np.float64) with tm.assert_produces_warning( FutureWarning if idxer == "var" else None, match="incompatible dtype" - ): # false positive? + ): + # See https://github.com/pandas-dev/pandas/issues/56223 result.loc[:, idxer] = expected tm.assert_frame_equal(result, expected) From 27bf409f7c3d4c0b734463e10836023ff4a14928 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 28 Nov 2023 15:19:11 +0000 Subject: [PATCH 25/35] exclude abcdataframe setting --- pandas/core/indexing.py | 18 +++++++++++------- pandas/tests/indexing/test_loc.py | 6 +----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 8362caa5bd9b6..7822f94b6abab 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2112,14 +2112,18 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: # then we can use value's dtype (or inferred dtype) # instead of object dtype = self.obj.dtypes.iloc[loc] - if dtype not in (np.void, object): + if dtype not in (np.void, object) and not isinstance( + value, ABCDataFrame + ): # Exclude np.void, as that is a special case for expansion. - # We want to warn for - # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) - # df.loc[:, 'a'] = .3 - # but not for - # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) - # df.loc[:, 'b'] = .3 + # We want to warn for + # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + # df.loc[:, 'a'] = .3 + # but not for + # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + # df.loc[:, 'b'] = .3 + # Exclude `object`, as then no upcasting happens. + # Exclude ABCDataFrame due to https://github.com/pandas-dev/pandas/issues/56223 warnings.warn( f"Setting an item of incompatible dtype is deprecated " "and will raise in a future error of pandas. " diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index f6a93202afa9a..61220aaa94f92 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1488,11 +1488,7 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): # if result started off with object dtype, then the .loc.__setitem__ # below would retain object dtype result = DataFrame(index=idx, columns=["var"], dtype=np.float64) - with tm.assert_produces_warning( - FutureWarning if idxer == "var" else None, match="incompatible dtype" - ): - # See https://github.com/pandas-dev/pandas/issues/56223 - result.loc[:, idxer] = expected + result.loc[:, idxer] = expected tm.assert_frame_equal(result, expected) def test_loc_setitem_time_key(self, using_array_manager): From b1a477701af5112a6326a5b6949a2aaf0f373d04 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 28 Nov 2023 15:23:06 +0000 Subject: [PATCH 26/35] exclude "expanding empty df" case --- pandas/core/indexing.py | 5 +---- pandas/tests/indexing/test_loc.py | 9 ++++++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 7822f94b6abab..abc8efa8fd83b 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2112,9 +2112,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: # then we can use value's dtype (or inferred dtype) # instead of object dtype = self.obj.dtypes.iloc[loc] - if dtype not in (np.void, object) and not isinstance( - value, ABCDataFrame - ): + if dtype != np.void and not (self.obj.empty): # Exclude np.void, as that is a special case for expansion. # We want to warn for # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) @@ -2123,7 +2121,6 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) # df.loc[:, 'b'] = .3 # Exclude `object`, as then no upcasting happens. - # Exclude ABCDataFrame due to https://github.com/pandas-dev/pandas/issues/56223 warnings.warn( f"Setting an item of incompatible dtype is deprecated " "and will raise in a future error of pandas. " diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 61220aaa94f92..d9e193dc4a08d 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1207,8 +1207,7 @@ def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self): df = DataFrame(columns=["x", "y"]) df["x"] = df["x"].astype(np.int64) - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - df.loc[:, "x"] = data + df.loc[:, "x"] = data tm.assert_frame_equal(df, expected) def test_loc_setitem_empty_append_single_value(self): @@ -1488,7 +1487,11 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): # if result started off with object dtype, then the .loc.__setitem__ # below would retain object dtype result = DataFrame(index=idx, columns=["var"], dtype=np.float64) - result.loc[:, idxer] = expected + with tm.assert_produces_warning( + FutureWarning if idxer == "var" else None, match="incompatible dtype" + ): + # See https://github.com/pandas-dev/pandas/issues/56223 + result.loc[:, idxer] = expected tm.assert_frame_equal(result, expected) def test_loc_setitem_time_key(self, using_array_manager): From 5b4350998b47d745094dce15ce7bd101376ad869 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 28 Nov 2023 15:26:17 +0000 Subject: [PATCH 27/35] reduce diff --- pandas/core/indexing.py | 2 +- pandas/tests/frame/indexing/test_indexing.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index abc8efa8fd83b..ae1dbc42d77a1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2112,7 +2112,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: # then we can use value's dtype (or inferred dtype) # instead of object dtype = self.obj.dtypes.iloc[loc] - if dtype != np.void and not (self.obj.empty): + if dtype != np.void and not self.obj.empty: # Exclude np.void, as that is a special case for expansion. # We want to warn for # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index c971072084d72..9f80a9d365bd7 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1393,9 +1393,7 @@ def test_loc_setitem_rhs_frame(self, idxr, val): tm.assert_frame_equal(df, expected) @td.skip_array_manager_invalid_test - def test_iloc_setitem_enlarge_no_warning( - self, warn_copy_on_write, using_copy_on_write - ): + def test_iloc_setitem_enlarge_no_warning(self, warn_copy_on_write): # GH#47381 df = DataFrame(columns=["a", "b"]) expected = df.copy() From 428c146d01d6ced4791ff6f91fca53fe2f2c34e4 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 28 Nov 2023 15:29:37 +0000 Subject: [PATCH 28/35] update --- pandas/tests/frame/indexing/test_setitem.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 64a2120579cb3..a3cb33275d80f 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -860,8 +860,7 @@ def test_setitem_string_column_numpy_dtype_raising(self): def test_setitem_empty_df_duplicate_columns(self, using_copy_on_write): # GH#38521 df = DataFrame(columns=["a", "b", "b"], dtype="float64") - with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): - df.loc[:, "a"] = list(range(2)) + df.loc[:, "a"] = list(range(2)) expected = DataFrame( [[0, np.nan, np.nan], [1, np.nan, np.nan]], columns=["a", "b", "b"] ) From 12cd4845fb9beac26450ec71bcaca94a314b04e6 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 28 Nov 2023 15:43:14 +0000 Subject: [PATCH 29/35] ooooh this works? --- pandas/core/frame.py | 5 ++--- pandas/core/indexing.py | 8 +++++--- pandas/tests/frame/methods/test_update.py | 5 +---- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5d05983529fba..638233bf8b383 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8862,8 +8862,6 @@ def update( stacklevel=2, ) - from pandas.core.computation import expressions - # TODO: Support other joins if join != "left": # pragma: no cover raise NotImplementedError("Only left join is supported") @@ -8897,7 +8895,8 @@ def update( if mask.all(): continue - self.loc[:, col] = expressions.where(mask, this, that) + # self.loc[:, col] = expressions.where(mask, this, that) + self.loc[:, col] = self[col].where(mask, other[col]) # ---------------------------------------------------------------------- # Data reshaping diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ae1dbc42d77a1..8a2134ebe193c 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2112,15 +2112,17 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: # then we can use value's dtype (or inferred dtype) # instead of object dtype = self.obj.dtypes.iloc[loc] - if dtype != np.void and not self.obj.empty: - # Exclude np.void, as that is a special case for expansion. + if dtype not in (np.void, object) and not self.obj.empty: + # - Exclude np.void, as that is a special case for expansion. # We want to warn for # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) # df.loc[:, 'a'] = .3 # but not for # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) # df.loc[:, 'b'] = .3 - # Exclude `object`, as then no upcasting happens. + # - Exclude `object`, as then no upcasting happens. + # - Exclude empty initial object with enlargement, + # as then there's nothing to be inconsistent with. warnings.warn( f"Setting an item of incompatible dtype is deprecated " "and will raise in a future error of pandas. " diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index a77378587fe23..ba9ed45cde329 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -136,10 +136,7 @@ def test_update_datetime_tz(self): # GH 25807 result = DataFrame([pd.Timestamp("2019", tz="UTC")]) # with tm.assert_produces_warning(None): - with tm.assert_produces_warning( - FutureWarning, match="incompatible dtype" - ): # todo wrong - result.update(result) + result.update(result) expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) tm.assert_frame_equal(result, expected) From 45dd43bc0289d1b698b779e1652b39283c53a9a1 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 4 Dec 2023 11:39:38 +0000 Subject: [PATCH 30/35] reduce diff --- pandas/tests/frame/methods/test_update.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 08e41f7c99026..6a4a8c13b96c8 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -135,8 +135,8 @@ def test_update_from_non_df(self): def test_update_datetime_tz(self): # GH 25807 result = DataFrame([pd.Timestamp("2019", tz="UTC")]) - # with tm.assert_produces_warning(None): - result.update(result) + with tm.assert_produces_warning(None): + result.update(result) expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) tm.assert_frame_equal(result, expected) From 54f34599fbfd1b3ac3c854917b0b1be2683e3422 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 4 Dec 2023 11:45:54 +0000 Subject: [PATCH 31/35] simplify --- pandas/core/internals/blocks.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index da741fd384616..c045eac5c3000 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -498,13 +498,11 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: and is_integer_dtype(self.values.dtype) and isna(other) and other is not NaT + and not ( + isinstance(other, (np.datetime64, np.timedelta64)) and np.isnat(other) + ) ): - try: - is_np_nat = np.isnat(other) - except TypeError: - is_np_nat = False - if not is_np_nat: - warn_on_upcast = False + warn_on_upcast = False elif ( isinstance(other, np.ndarray) and other.ndim == 1 From 96ee0aee136269c5eeca803f37b86daf4adaed49 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 4 Dec 2023 13:32:28 +0000 Subject: [PATCH 32/35] use isetitem --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/io/stata.py | 8 ++++---- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/io/json/test_pandas.py | 4 +--- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index f5b269e5f6ea1..0a977a0e18784 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -529,7 +529,7 @@ Conversion ^^^^^^^^^^ - Bug in :func:`astype` when called with ``str`` on unpickled array - the array might change in-place (:issue:`54654`) - Bug in :meth:`Series.convert_dtypes` not converting all NA column to ``null[pyarrow]`` (:issue:`55346`) -- Bug in ``DataFrame.loc`` was not throwing "incompatible dtype warning" (see PDEP6) when assigning a ``Series`` with a different dtype using a full column setter (e.g. ``df.loc[:, 'a'] = incompatible_value``) (:issue:`55791`) +- Bug in ``DataFrame.loc`` was not throwing "incompatible dtype warning" (see PDEP6) when assigning a ``Series`` with a different dtype using a full column setter (e.g. ``df.loc[:, 'a'] = incompatible_value``) (:issue:`39584`) - Strings diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 193f44cde4274..06fb4e7dc2515 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1790,8 +1790,8 @@ def read( if convert_dates: for i, fmt in enumerate(self._fmtlist): if any(fmt.startswith(date_fmt) for date_fmt in _date_formats): - data[data.columns[i]] = _stata_elapsed_date_to_datetime_vec( - data.iloc[:, i], fmt + data.isetitem( + i, _stata_elapsed_date_to_datetime_vec(data.iloc[:, i], fmt) ) if convert_categoricals and self._format_version > 108: @@ -1866,7 +1866,7 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra replacements[i] = replacement if replacements: for idx, value in replacements.items(): - data[data.columns[idx]] = value + data.isetitem(idx, value) return data def _insert_strls(self, data: DataFrame) -> DataFrame: @@ -1876,7 +1876,7 @@ def _insert_strls(self, data: DataFrame) -> DataFrame: if typ != "Q": continue # Wrap v_o in a string to allow uint64 values as keys on 32bit OS - data[data.columns[i]] = [self.GSO[str(k)] for k in data.iloc[:, i]] + data.isetitem(i, [self.GSO[str(k)] for k in data.iloc[:, i]]) return data def _do_select_columns(self, data: DataFrame, columns: Sequence[str]) -> DataFrame: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 343dd285b31aa..441f60b0f0301 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2835,7 +2835,7 @@ def test_dict_data_arrow_column_expansion(self, key_val, col_vals, col_type): ) result = DataFrame({key_val: [1, 2]}, columns=cols) expected = DataFrame([[1, np.nan], [2, np.nan]], columns=cols) - expected[expected.columns[1]] = expected.iloc[:, 1].astype(object) + expected.isetitem(1, expected.iloc[:, 1].astype(object)) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 33221463637d3..7b44cd569d349 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -179,9 +179,7 @@ def test_frame_non_unique_columns(self, orient, data): # in milliseconds; these are internally stored in nanosecond, # so divide to get where we need # TODO: a to_epoch method would also solve; see GH 14772 - expected[expected.columns[0]] = ( - expected.iloc[:, 0].astype(np.int64) // 1000000 - ) + expected.isetitem(0, expected.iloc[:, 0].astype(np.int64) // 1000000) elif orient == "split": expected = df expected.columns = ["x", "x.1"] From e3b04b79d0346a3df612fb41f09dff8161d9e183 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 4 Dec 2023 13:50:35 +0000 Subject: [PATCH 33/35] fix comment --- pandas/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1653ea09e4700..d4fe217f58fc7 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2130,10 +2130,10 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: if dtype not in (np.void, object) and not self.obj.empty: # - Exclude np.void, as that is a special case for expansion. # We want to warn for - # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + # df = pd.DataFrame({'a': [1, 2]}) # df.loc[:, 'a'] = .3 # but not for - # df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + # df = pd.DataFrame({'a': [1, 2]}) # df.loc[:, 'b'] = .3 # - Exclude `object`, as then no upcasting happens. # - Exclude empty initial object with enlargement, From 7b0e3c8c93ec7bfd6c563b7905720eb816c67db1 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 20 Dec 2023 20:24:38 +0000 Subject: [PATCH 34/35] revert unnecessary change, add link in whatsnew note --- doc/source/whatsnew/v2.2.0.rst | 2 +- pandas/tests/groupby/test_groupby.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index a9ac01646e98e..c3c9e3141a43c 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -590,7 +590,7 @@ Conversion - Bug in :meth:`DataFrame.astype` when called with ``str`` on unpickled array - the array might change in-place (:issue:`54654`) - Bug in :meth:`DataFrame.astype` where ``errors="ignore"`` had no effect for extension types (:issue:`54654`) - Bug in :meth:`Series.convert_dtypes` not converting all NA column to ``null[pyarrow]`` (:issue:`55346`) -- Bug in ``DataFrame.loc`` was not throwing "incompatible dtype warning" (see PDEP6) when assigning a ``Series`` with a different dtype using a full column setter (e.g. ``df.loc[:, 'a'] = incompatible_value``) (:issue:`39584`) +- Bug in ``DataFrame.loc`` was not throwing "incompatible dtype warning" (see `PDEP6 `_) when assigning a ``Series`` with a different dtype using a full column setter (e.g. ``df.loc[:, 'a'] = incompatible_value``) (:issue:`39584`) - Strings diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 519555a6958c7..abf19c3c6934a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -199,7 +199,7 @@ def f_1(grp): with tm.assert_produces_warning(FutureWarning, match=msg): result = df.groupby("A").apply(f_1)[["B"]] e = expected.copy() - e["B"] = [3, 5, float("nan")] + e.loc["Tiger"] = np.nan tm.assert_frame_equal(result, e) def f_2(grp): From 183a609bf7528ed21b33a020cfa14a2b550293f2 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Wed, 20 Dec 2023 20:35:07 +0000 Subject: [PATCH 35/35] one more --- pandas/tests/groupby/test_groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index abf19c3c6934a..fce7caa90cce4 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -211,7 +211,7 @@ def f_2(grp): with tm.assert_produces_warning(FutureWarning, match=msg): result = df.groupby("A").apply(f_2)[["B"]] e = expected.copy() - e["B"] = [3, float("nan"), 0] + e.loc["Pony"] = np.nan tm.assert_frame_equal(result, e) # 5592 revisited, with datetimes