diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index ce0c50a810ab1..0c99e5e7bdc54 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -389,6 +389,9 @@ def eval( # to use a non-numeric indexer try: with warnings.catch_warnings(record=True): + warnings.filterwarnings( + "always", "Setting a value on a view", FutureWarning + ) # TODO: Filter the warnings we actually care about here. if inplace and isinstance(target, NDFrame): target.loc[:, assigner] = ret diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c3696be0579b0..956e84867709f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4857,6 +4857,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: inplace = validate_bool_kwarg(inplace, "inplace") kwargs["level"] = kwargs.pop("level", 0) + 1 + # TODO(CoW) those index/column resolvers create unnecessary refs to `self` index_resolvers = self._get_index_resolvers() column_resolvers = self._get_cleaned_column_resolvers() resolvers = column_resolvers, index_resolvers diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a3c2ede55dabf..ab9209d509995 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1737,13 +1737,14 @@ def round(self, decimals: int, using_cow: bool = False) -> Self: # has no attribute "round" values = self.values.round(decimals) # type: ignore[union-attr] if values is self.values: - refs = self.refs if not using_cow: # Normally would need to do this before, but # numpy only returns same array when round operation # is no-op # https://github.com/numpy/numpy/blob/486878b37fc7439a3b2b87747f50db9b62fea8eb/numpy/core/src/multiarray/calculation.c#L625-L636 values = values.copy() + else: + refs = self.refs return self.make_block_same_class(values, refs=refs) # --------------------------------------------------------------------- diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 13c039cef3f91..2e20e97ca53b2 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1332,7 +1332,13 @@ def column_setitem( This is a method on the BlockManager level, to avoid creating an intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) """ - if using_copy_on_write() and not self._has_no_reference(loc): + if warn_copy_on_write() and not self._has_no_reference(loc): + warnings.warn( + COW_WARNING_GENERAL_MSG, + FutureWarning, + stacklevel=find_stack_level(), + ) + elif using_copy_on_write() and not self._has_no_reference(loc): blkno = self.blknos[loc] # Split blocks to only copy the column we want to modify blk_loc = self.blklocs[loc] diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 2311ce928ee27..40c3069dfeebf 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1164,7 +1164,9 @@ def test_assignment_single_assign_new(self): df.eval("c = a + b", inplace=True) tm.assert_frame_equal(df, expected) - def test_assignment_single_assign_local_overlap(self): + # TODO(CoW-warn) this should not warn (DataFrame.eval creates refs to self) + @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") + def test_assignment_single_assign_local_overlap(self, warn_copy_on_write): df = DataFrame( np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") ) @@ -1218,6 +1220,8 @@ def test_column_in(self): tm.assert_series_equal(result, expected, check_names=False) @pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.") + # TODO(CoW-warn) this should not warn (DataFrame.eval creates refs to self) + @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") def test_assignment_not_inplace(self): # see gh-9297 df = DataFrame( @@ -1897,13 +1901,14 @@ def test_eval_no_support_column_name(request, column): tm.assert_frame_equal(result, expected) -def test_set_inplace(using_copy_on_write): +def test_set_inplace(using_copy_on_write, warn_copy_on_write): # https://github.com/pandas-dev/pandas/issues/47449 # Ensure we don't only update the DataFrame inplace, but also the actual # column values, such that references to this column also get updated df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) result_view = df[:] ser = df["A"] + # with tm.assert_cow_warning(warn_copy_on_write): df.eval("A = B + C", inplace=True) expected = DataFrame({"A": [11, 13, 15], "B": [4, 5, 6], "C": [7, 8, 9]}) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index 221bf8759875f..89384a4ef6ba6 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -242,8 +242,8 @@ def test_dataframe_from_dict_of_series( assert np.shares_memory(get_array(result, "a"), get_array(s1)) # mutating the new dataframe doesn't mutate original - # TODO(CoW-warn) this should also warn - result.iloc[0, 0] = 10 + with tm.assert_cow_warning(warn_copy_on_write): + result.iloc[0, 0] = 10 if using_copy_on_write: assert not np.shares_memory(get_array(result, "a"), get_array(s1)) tm.assert_series_equal(s1, s1_orig) diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py index bfdf2b92fd326..97d77ef33d849 100644 --- a/pandas/tests/copy_view/test_core_functionalities.py +++ b/pandas/tests/copy_view/test_core_functionalities.py @@ -28,27 +28,32 @@ def test_setitem_dont_track_unnecessary_references(using_copy_on_write): assert np.shares_memory(arr, get_array(df, "a")) -def test_setitem_with_view_copies(using_copy_on_write): +def test_setitem_with_view_copies(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) view = df[:] expected = df.copy() df["b"] = 100 arr = get_array(df, "a") - df.iloc[0, 0] = 100 # Check that we correctly track reference + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 0] = 100 # Check that we correctly track reference if using_copy_on_write: assert not np.shares_memory(arr, get_array(df, "a")) tm.assert_frame_equal(view, expected) -def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write, request): +def test_setitem_with_view_invalidated_does_not_copy( + using_copy_on_write, warn_copy_on_write, request +): df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) view = df[:] df["b"] = 100 arr = get_array(df, "a") view = None # noqa: F841 - df.iloc[0, 0] = 100 + # TODO(CoW-warn) false positive? + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 0] = 100 if using_copy_on_write: # Setitem split the block. Since the old block shared data with view # all the new blocks are referencing view and each other. When view diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index c4d5e9dbce72a..5a0ebff64a803 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -101,7 +101,7 @@ def test_subset_column_selection_modify_parent(backend, using_copy_on_write): tm.assert_frame_equal(subset, expected) -def test_subset_row_slice(backend, using_copy_on_write): +def test_subset_row_slice(backend, using_copy_on_write, warn_copy_on_write): # Case: taking a subset of the rows of a DataFrame using a slice # + afterwards modifying the subset _, DataFrame, _ = backend @@ -121,7 +121,8 @@ def test_subset_row_slice(backend, using_copy_on_write): # INFO this no longer raise warning since pandas 1.4 # with pd.option_context("chained_assignment", "warn"): # with tm.assert_produces_warning(SettingWithCopyWarning): - subset.iloc[0, 0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + subset.iloc[0, 0] = 0 subset._mgr._verify_integrity() @@ -334,8 +335,11 @@ def test_subset_set_with_row_indexer( pytest.skip("setitem with labels selects on columns") # TODO(CoW-warn) should warn - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: indexer_si(subset)[indexer] = 0 + elif warn_copy_on_write: + with tm.assert_cow_warning(): + indexer_si(subset)[indexer] = 0 else: # INFO iloc no longer raises warning since pandas 1.4 warn = SettingWithCopyWarning if indexer_si is tm.setitem else None @@ -419,7 +423,7 @@ def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write): "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) def test_subset_set_column_with_loc( - backend, using_copy_on_write, using_array_manager, dtype + backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype ): # Case: setting a single column with loc on a viewing subset # -> subset.loc[:, col] = value @@ -432,6 +436,9 @@ def test_subset_set_column_with_loc( if using_copy_on_write: subset.loc[:, "a"] = np.array([10, 11], dtype="int64") + elif warn_copy_on_write: + with tm.assert_cow_warning(): + subset.loc[:, "a"] = np.array([10, 11], dtype="int64") else: with pd.option_context("chained_assignment", "warn"): with tm.assert_produces_warning( @@ -455,7 +462,9 @@ def test_subset_set_column_with_loc( tm.assert_frame_equal(df, df_orig) -def test_subset_set_column_with_loc2(backend, using_copy_on_write, using_array_manager): +def test_subset_set_column_with_loc2( + backend, using_copy_on_write, warn_copy_on_write, using_array_manager +): # Case: setting a single column with loc on a viewing subset # -> subset.loc[:, col] = value # separate test for case of DataFrame of a single column -> takes a separate @@ -467,6 +476,9 @@ def test_subset_set_column_with_loc2(backend, using_copy_on_write, using_array_m if using_copy_on_write: subset.loc[:, "a"] = 0 + elif warn_copy_on_write: + with tm.assert_cow_warning(): + subset.loc[:, "a"] = 0 else: with pd.option_context("chained_assignment", "warn"): with tm.assert_produces_warning( @@ -528,7 +540,9 @@ def test_subset_set_columns(backend, using_copy_on_write, warn_copy_on_write, dt [slice("a", "b"), np.array([True, True, False]), ["a", "b"]], ids=["slice", "mask", "array"], ) -def test_subset_set_with_column_indexer(backend, indexer, using_copy_on_write): +def test_subset_set_with_column_indexer( + backend, indexer, using_copy_on_write, warn_copy_on_write +): # Case: setting multiple columns with a column indexer on a viewing subset # -> subset.loc[:, [col1, col2]] = value _, DataFrame, _ = backend @@ -538,6 +552,9 @@ def test_subset_set_with_column_indexer(backend, indexer, using_copy_on_write): if using_copy_on_write: subset.loc[:, indexer] = 0 + elif warn_copy_on_write: + with tm.assert_cow_warning(): + subset.loc[:, indexer] = 0 else: with pd.option_context("chained_assignment", "warn"): # As of 2.0, this setitem attempts (successfully) to set values @@ -659,10 +676,7 @@ def test_subset_chained_getitem_column( # modify parent -> don't modify subset subset = df[:]["a"][0:2] df._clear_item_cache() - # TODO(CoW-warn) should also warn for mixed block and nullable dtypes - with tm.assert_cow_warning( - warn_copy_on_write and dtype == "int64" and dtype_backend == "numpy" - ): + with tm.assert_cow_warning(warn_copy_on_write): df.iloc[0, 0] = 0 expected = Series([1, 2], name="a") if using_copy_on_write: @@ -765,8 +779,7 @@ def test_null_slice(backend, method, using_copy_on_write, warn_copy_on_write): assert df2 is not df # and those trigger CoW when mutated - # TODO(CoW-warn) should also warn for nullable dtypes - with tm.assert_cow_warning(warn_copy_on_write and dtype_backend == "numpy"): + with tm.assert_cow_warning(warn_copy_on_write): df2.iloc[0, 0] = 0 if using_copy_on_write: tm.assert_frame_equal(df, df_orig) @@ -884,10 +897,10 @@ def test_series_subset_set_with_indexer( # del operator -def test_del_frame(backend, using_copy_on_write): +def test_del_frame(backend, using_copy_on_write, warn_copy_on_write): # Case: deleting a column with `del` on a viewing child dataframe should # not modify parent + update the references - _, DataFrame, _ = backend + dtype_backend, DataFrame, _ = backend df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = df[:] @@ -901,11 +914,14 @@ def test_del_frame(backend, using_copy_on_write): tm.assert_frame_equal(df2, df_orig[["a", "c"]]) df2._mgr._verify_integrity() - df.loc[0, "b"] = 200 + # TODO(CoW-warn) false positive, this should not warn? + with tm.assert_cow_warning(warn_copy_on_write and dtype_backend == "numpy"): + df.loc[0, "b"] = 200 assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) df_orig = df.copy() - df2.loc[0, "a"] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + df2.loc[0, "a"] = 100 if using_copy_on_write: # modifying child after deleting a column still doesn't update parent tm.assert_frame_equal(df, df_orig) @@ -1109,7 +1125,6 @@ def test_set_value_copy_only_necessary_column( ): # When setting inplace, only copy column that is modified instead of the whole # block (by splitting the block) - single_block = isinstance(col[0], int) df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": col}) df_orig = df.copy() view = df[:] @@ -1120,11 +1135,7 @@ def test_set_value_copy_only_necessary_column( ): indexer_func(df)[indexer] = val else: - # TODO(CoW-warn) should also warn in the other cases - with tm.assert_cow_warning( - warn_copy_on_write - and not (indexer[0] == slice(None) or (not single_block and val == 100)) - ): + with tm.assert_cow_warning(warn_copy_on_write): indexer_func(df)[indexer] = val if using_copy_on_write: diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index 5507e81d04e2a..b2f3312abf8b5 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -324,11 +324,12 @@ def test_fillna_ea_noop_shares_memory( def test_fillna_inplace_ea_noop_shares_memory( - using_copy_on_write, any_numeric_ea_and_arrow_dtype + using_copy_on_write, warn_copy_on_write, any_numeric_ea_and_arrow_dtype ): df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype) df_orig = df.copy() view = df[:] + # TODO(CoW-warn) df.fillna(100, inplace=True) if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: @@ -342,7 +343,9 @@ def test_fillna_inplace_ea_noop_shares_memory( assert not df._mgr._has_no_reference(1) assert not view._mgr._has_no_reference(1) - df.iloc[0, 1] = 100 + # TODO(CoW-warn) should this warn for ArrowDtype? + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 1] = 100 if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: tm.assert_frame_equal(df_orig, view) else: diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 73bb9b4a71741..faa1eec09d30b 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -39,7 +39,7 @@ def test_copy(using_copy_on_write): assert df.iloc[0, 0] == 1 -def test_copy_shallow(using_copy_on_write): +def test_copy_shallow(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_copy = df.copy(deep=False) @@ -69,7 +69,8 @@ def test_copy_shallow(using_copy_on_write): assert np.shares_memory(get_array(df_copy, "c"), get_array(df, "c")) else: # mutating shallow copy does mutate original - df_copy.iloc[0, 0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + df_copy.iloc[0, 0] = 0 assert df.iloc[0, 0] == 0 # and still shares memory assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) @@ -546,7 +547,7 @@ def test_shift_columns(using_copy_on_write, warn_copy_on_write): tm.assert_frame_equal(df2, expected) -def test_pop(using_copy_on_write): +def test_pop(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() view_original = df[:] @@ -558,7 +559,8 @@ def test_pop(using_copy_on_write): if using_copy_on_write: result.iloc[0] = 0 assert not np.shares_memory(result.values, get_array(view_original, "a")) - df.iloc[0, 0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 0] = 0 if using_copy_on_write: assert not np.shares_memory(get_array(df, "b"), get_array(view_original, "b")) tm.assert_frame_equal(view_original, df_orig) @@ -744,7 +746,7 @@ def test_swapaxes_read_only_array(): ], ids=["shallow-copy", "reset_index", "rename", "select_dtypes"], ) -def test_chained_methods(request, method, idx, using_copy_on_write): +def test_chained_methods(request, method, idx, using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() @@ -753,13 +755,15 @@ def test_chained_methods(request, method, idx, using_copy_on_write): # modify df2 -> don't modify df df2 = method(df) - df2.iloc[0, idx] = 0 + with tm.assert_cow_warning(warn_copy_on_write and df2_is_view): + df2.iloc[0, idx] = 0 if not df2_is_view: tm.assert_frame_equal(df, df_orig) # modify df -> don't modify df2 df2 = method(df) - df.iloc[0, 0] = 0 + with tm.assert_cow_warning(warn_copy_on_write and df2_is_view): + df.iloc[0, 0] = 0 if not df2_is_view: tm.assert_frame_equal(df2.iloc[:, idx:], df_orig) @@ -908,7 +912,7 @@ def test_dropna_series(using_copy_on_write, val): lambda df: df.tail(3), ], ) -def test_head_tail(method, using_copy_on_write): +def test_head_tail(method, using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = method(df) @@ -921,14 +925,16 @@ def test_head_tail(method, using_copy_on_write): assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) # modify df2 to trigger CoW for that block - df2.iloc[0, 0] = 0 + with tm.assert_cow_warning(warn_copy_on_write): + df2.iloc[0, 0] = 0 if using_copy_on_write: assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: # without CoW enabled, head and tail return views. Mutating df2 also mutates df. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - df2.iloc[0, 0] = 1 + with tm.assert_cow_warning(warn_copy_on_write): + df2.iloc[0, 0] = 1 tm.assert_frame_equal(df, df_orig) @@ -1160,7 +1166,7 @@ def test_sort_values_inplace(using_copy_on_write, obj, kwargs, warn_copy_on_writ @pytest.mark.parametrize("decimals", [-1, 0, 1]) -def test_round(using_copy_on_write, decimals): +def test_round(using_copy_on_write, warn_copy_on_write, decimals): df = DataFrame({"a": [1, 2], "b": "c"}) df_orig = df.copy() df2 = df.round(decimals=decimals) @@ -1175,6 +1181,7 @@ def test_round(using_copy_on_write, decimals): assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) df2.iloc[0, 1] = "d" df2.iloc[0, 0] = 4 @@ -1756,13 +1763,15 @@ def test_xs_multiindex( tm.assert_frame_equal(df, df_orig) -def test_update_frame(using_copy_on_write): +def test_update_frame(using_copy_on_write, warn_copy_on_write): df1 = DataFrame({"a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0]}) df2 = DataFrame({"b": [100.0]}, index=[1]) df1_orig = df1.copy() view = df1[:] - df1.update(df2) + # TODO(CoW) better warning message? + with tm.assert_cow_warning(warn_copy_on_write): + df1.update(df2) expected = DataFrame({"a": [1.0, 2.0, 3.0], "b": [4.0, 100.0, 6.0]}) tm.assert_frame_equal(df1, expected) @@ -1947,7 +1956,7 @@ def test_eval(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_eval_inplace(using_copy_on_write): +def test_eval_inplace(using_copy_on_write, warn_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": 1}) df_orig = df.copy() df_view = df[:] @@ -1955,6 +1964,7 @@ def test_eval_inplace(using_copy_on_write): df.eval("c = a+b", inplace=True) assert np.shares_memory(get_array(df, "a"), get_array(df_view, "a")) - df.iloc[0, 0] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 0] = 100 if using_copy_on_write: tm.assert_frame_equal(df_view, df_orig) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 83e2c795b8b5e..b8ede450b3c1a 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -402,7 +402,7 @@ def test_setitem_frame_2d_values(self, data): orig = df.copy() - df.iloc[:] = df + df.iloc[:] = df.copy() tm.assert_frame_equal(df, orig) df.iloc[:-1] = df.iloc[:-1].copy() diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 93f391b16817c..d76bfd2d7ae7c 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -561,7 +561,7 @@ def test_getitem_setitem_integer_slice_keyerrors(self): @td.skip_array_manager_invalid_test # already covered in test_iloc_col_slice_view def test_fancy_getitem_slice_mixed( - self, float_frame, float_string_frame, using_copy_on_write + self, float_frame, float_string_frame, using_copy_on_write, warn_copy_on_write ): sliced = float_string_frame.iloc[:, -3:] assert sliced["D"].dtype == np.float64 @@ -573,7 +573,8 @@ def test_fancy_getitem_slice_mixed( assert np.shares_memory(sliced["C"]._values, float_frame["C"]._values) - sliced.loc[:, "C"] = 4.0 + with tm.assert_cow_warning(warn_copy_on_write): + sliced.loc[:, "C"] = 4.0 if not using_copy_on_write: assert (float_frame["C"] == 4).all() @@ -1049,7 +1050,7 @@ def test_iloc_row(self): expected = df.reindex(df.index[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) - def test_iloc_row_slice_view(self, using_copy_on_write, request): + def test_iloc_row_slice_view(self, using_copy_on_write, warn_copy_on_write): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), index=range(0, 20, 2) ) @@ -1062,9 +1063,9 @@ def test_iloc_row_slice_view(self, using_copy_on_write, request): assert np.shares_memory(df[2], subset[2]) exp_col = original[2].copy() - subset.loc[:, 2] = 0.0 - if not using_copy_on_write: + with tm.assert_cow_warning(warn_copy_on_write): subset.loc[:, 2] = 0.0 + if not using_copy_on_write: exp_col._values[4:8] = 0.0 # With the enforcement of GH#45333 in 2.0, this remains a view @@ -1094,7 +1095,9 @@ def test_iloc_col(self): expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) - def test_iloc_col_slice_view(self, using_array_manager, using_copy_on_write): + def test_iloc_col_slice_view( + self, using_array_manager, using_copy_on_write, warn_copy_on_write + ): df = DataFrame( np.random.default_rng(2).standard_normal((4, 10)), columns=range(0, 20, 2) ) @@ -1105,7 +1108,8 @@ def test_iloc_col_slice_view(self, using_array_manager, using_copy_on_write): # verify slice is view assert np.shares_memory(df[8]._values, subset[8]._values) - subset.loc[:, 8] = 0.0 + with tm.assert_cow_warning(warn_copy_on_write): + subset.loc[:, 8] = 0.0 assert (df[8] == 0).all() @@ -1388,12 +1392,13 @@ def test_loc_setitem_rhs_frame(self, idxr, val, warn): tm.assert_frame_equal(df, expected) @td.skip_array_manager_invalid_test - def test_iloc_setitem_enlarge_no_warning(self): + def test_iloc_setitem_enlarge_no_warning(self, warn_copy_on_write): # GH#47381 df = DataFrame(columns=["a", "b"]) expected = df.copy() view = df[:] - with tm.assert_produces_warning(None): + # TODO(CoW-warn) false positive: shouldn't warn in case of enlargement? + with tm.assert_produces_warning(FutureWarning if warn_copy_on_write else None): df.iloc[:, 0] = np.array([1, 2], dtype=np.float64) tm.assert_frame_equal(view, expected) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 49dd7a3c4df9b..4e37d8ff3c024 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -816,7 +816,7 @@ def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected): class TestDataFrameSetItemWithExpansion: - def test_setitem_listlike_views(self, using_copy_on_write): + def test_setitem_listlike_views(self, using_copy_on_write, warn_copy_on_write): # GH#38148 df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]}) @@ -827,7 +827,8 @@ def test_setitem_listlike_views(self, using_copy_on_write): df[["c", "d"]] = np.array([[0.1, 0.2], [0.3, 0.4], [0.4, 0.5]]) # edit in place the first column to check view semantics - df.iloc[0, 0] = 100 + with tm.assert_cow_warning(warn_copy_on_write): + df.iloc[0, 0] = 100 if using_copy_on_write: expected = Series([1, 2, 3], name="a") diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 1b7e669232592..bef18dbaf8a8a 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -89,7 +89,7 @@ def test_diff_datetime_with_nat_zero_periods(self, tz): # diff on NaT values should give NaT, not timedelta64(0) dti = date_range("2016-01-01", periods=4, tz=tz) ser = Series(dti) - df = ser.to_frame() + df = ser.to_frame().copy() df[1] = ser.copy() diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 52b4b64ee279f..a32f290d3aa12 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -20,14 +20,18 @@ class TestFillNA: - def test_fillna_dict_inplace_nonunique_columns(self, using_copy_on_write): + def test_fillna_dict_inplace_nonunique_columns( + self, using_copy_on_write, warn_copy_on_write + ): df = DataFrame( {"A": [np.nan] * 3, "B": [NaT, Timestamp(1), NaT], "C": [np.nan, "foo", 2]} ) df.columns = ["A", "A", "A"] orig = df[:] - df.fillna({"A": 2}, inplace=True) + # TODO(CoW-warn) better warning message + with tm.assert_cow_warning(warn_copy_on_write): + df.fillna({"A": 2}, inplace=True) # The first and third columns can be set inplace, while the second cannot. expected = DataFrame( @@ -733,12 +737,16 @@ def test_fillna_inplace_with_columns_limit_and_value(self): @td.skip_array_manager_invalid_test @pytest.mark.parametrize("val", [-1, {"x": -1, "y": -1}]) - def test_inplace_dict_update_view(self, val, using_copy_on_write): + def test_inplace_dict_update_view( + self, val, using_copy_on_write, warn_copy_on_write + ): # GH#47188 df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]}) df_orig = df.copy() result_view = df[:] - df.fillna(val, inplace=True) + # TODO(CoW-warn) better warning message + should warn in all cases + with tm.assert_cow_warning(warn_copy_on_write and not isinstance(val, int)): + df.fillna(val, inplace=True) expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]}) tm.assert_frame_equal(df, expected) if using_copy_on_write: diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index b965a5d973fb6..c3bc96b44c807 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -164,12 +164,13 @@ def test_rename_multiindex(self): renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) tm.assert_index_equal(renamed.index, new_index) - def test_rename_nocopy(self, float_frame, using_copy_on_write): + def test_rename_nocopy(self, float_frame, using_copy_on_write, warn_copy_on_write): renamed = float_frame.rename(columns={"C": "foo"}, copy=False) assert np.shares_memory(renamed["foo"]._values, float_frame["C"]._values) - renamed.loc[:, "foo"] = 1.0 + with tm.assert_cow_warning(warn_copy_on_write): + renamed.loc[:, "foo"] = 1.0 if using_copy_on_write: assert not (float_frame["C"] == 1.0).all() else: diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index 471b9eaf936ad..28973fe0d7900 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -30,6 +30,7 @@ def test_copy_blocks(self, float_frame): # make sure we did not change the original DataFrame assert _last_df is not None and not _last_df[column].equals(df[column]) + @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") def test_no_copy_blocks(self, float_frame, using_copy_on_write): # GH#9607 df = DataFrame(float_frame, copy=True) diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 5738a25f26fcb..0d32788b04b03 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -154,13 +154,15 @@ def test_update_with_different_dtype(self, using_copy_on_write): tm.assert_frame_equal(df, expected) @td.skip_array_manager_invalid_test - def test_update_modify_view(self, using_copy_on_write): + def test_update_modify_view(self, using_copy_on_write, warn_copy_on_write): # GH#47188 df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]}) df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]}) df2_orig = df2.copy() result_view = df2[:] - df2.update(df) + # TODO(CoW-warn) better warning message + with tm.assert_cow_warning(warn_copy_on_write): + df2.update(df) expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]}) tm.assert_frame_equal(df2, expected) if using_copy_on_write: diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py index e3005c9b971ec..4a5571d0daa42 100644 --- a/pandas/tests/groupby/methods/test_nth.py +++ b/pandas/tests/groupby/methods/test_nth.py @@ -44,7 +44,9 @@ def test_first_last_nth(df): grouped["B"].last() grouped["B"].nth(0) + df = df.copy() df.loc[df["A"] == "foo", "B"] = np.nan + grouped = df.groupby("A") assert isna(grouped["B"].first()["foo"]) assert isna(grouped["B"].last()["foo"]) assert isna(grouped["B"].nth(0).iloc[0]) diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py index d949e5e36fa81..361a8c27fbf9d 100644 --- a/pandas/tests/groupby/methods/test_quantile.py +++ b/pandas/tests/groupby/methods/test_quantile.py @@ -447,8 +447,7 @@ def test_timestamp_groupby_quantile(unit): def test_groupby_quantile_dt64tz_period(): # GH#51373 dti = pd.date_range("2016-01-01", periods=1000) - ser = pd.Series(dti) - df = ser.to_frame() + df = pd.Series(dti).to_frame().copy() df[1] = dti.tz_localize("US/Pacific") df[2] = dti.to_period("D") df[3] = dti - dti[0] diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 3237c8f52797a..e31b675efb69a 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -201,7 +201,9 @@ def test_multiindex_assignment(self): df.loc[4, "d"] = arr tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d")) - def test_multiindex_assignment_single_dtype(self, using_copy_on_write): + def test_multiindex_assignment_single_dtype( + self, using_copy_on_write, warn_copy_on_write + ): # GH3777 part 2b # single dtype arr = np.array([0.0, 1.0]) @@ -215,6 +217,8 @@ def test_multiindex_assignment_single_dtype(self, using_copy_on_write): view = df["c"].iloc[:2].values # arr can be losslessly cast to int, so this setitem is inplace + # INFO(CoW-warn) this does not warn because we directly took .values + # above, so no reference to a pandas object is alive for `view` df.loc[4, "c"] = arr exp = Series(arr, index=[8, 10], name="c", dtype="int64") result = df.loc[4, "c"] @@ -234,7 +238,8 @@ def test_multiindex_assignment_single_dtype(self, using_copy_on_write): tm.assert_series_equal(result, exp) # scalar ok - df.loc[4, "c"] = 10 + with tm.assert_cow_warning(warn_copy_on_write): + df.loc[4, "c"] = 10 exp = Series(10, index=[8, 10], name="c", dtype="float64") tm.assert_series_equal(df.loc[4, "c"], exp) @@ -248,7 +253,8 @@ def test_multiindex_assignment_single_dtype(self, using_copy_on_write): # But with a length-1 listlike column indexer this behaves like # `df.loc[4, "c"] = 0 - df.loc[4, ["c"]] = [0] + with tm.assert_cow_warning(warn_copy_on_write): + df.loc[4, ["c"]] = [0] assert (df.loc[4, "c"] == 0).all() def test_groupby_example(self): diff --git a/pandas/tests/indexing/test_iat.py b/pandas/tests/indexing/test_iat.py index b4c4b81ac9cfb..5b8c4f2d4b9b9 100644 --- a/pandas/tests/indexing/test_iat.py +++ b/pandas/tests/indexing/test_iat.py @@ -41,11 +41,10 @@ def test_iat_setitem_item_cache_cleared( # previously this iat setting would split the block and fail to clear # the item_cache. - with tm.assert_cow_warning(warn_copy_on_write and indexer_ial is tm.iloc): + with tm.assert_cow_warning(warn_copy_on_write): indexer_ial(df)[7, 0] = 9999 - # TODO(CoW-warn) should also warn for iat? - with tm.assert_cow_warning(warn_copy_on_write and indexer_ial is tm.iloc): + with tm.assert_cow_warning(warn_copy_on_write): indexer_ial(df)[7, 1] = 1234 assert df.iat[7, 1] == 1234 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 5c0c1b42ca963..b3f93cbfd4113 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -850,9 +850,8 @@ def test_identity_slice_returns_new_object( # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig # depending on CoW - # TODO(CoW-warn) this should warn - # with tm.assert_cow_warning(warn_copy_on_write): - original_df.loc[:, "a"] = [4, 4, 4] + with tm.assert_cow_warning(warn_copy_on_write): + original_df.loc[:, "a"] = [4, 4, 4] if using_copy_on_write: assert (sliced_df["a"] == [1, 2, 3]).all() else: @@ -1142,7 +1141,7 @@ def test_iloc_getitem_with_duplicates2(self): expected = df.take([0], axis=1) tm.assert_frame_equal(result, expected) - def test_iloc_interval(self): + def test_iloc_interval(self, warn_copy_on_write): # GH#17130 df = DataFrame({Interval(1, 2): [1, 2]}) @@ -1155,7 +1154,9 @@ def test_iloc_interval(self): tm.assert_series_equal(result, expected) result = df.copy() - result.iloc[:, 0] += 1 + # TODO(CoW-warn) false positive + with tm.assert_cow_warning(warn_copy_on_write): + result.iloc[:, 0] += 1 expected = DataFrame({Interval(1, 2): [2, 3]}) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 35c50d6c705d1..96fd3f4e6fca0 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -828,7 +828,8 @@ def test_loc_setitem_frame_mixed_labels(self): df.loc[0, [1, 2]] = [5, 6] tm.assert_frame_equal(df, expected) - def test_loc_setitem_frame_multiples(self): + @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") + def test_loc_setitem_frame_multiples(self, warn_copy_on_write): # multiple setting df = DataFrame( {"A": ["foo", "bar", "baz"], "B": Series(range(3), dtype=np.int64)} @@ -1097,9 +1098,8 @@ def test_identity_slice_returns_new_object( # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig # depending on CoW - # TODO(CoW-warn) should warn - # with tm.assert_cow_warning(warn_copy_on_write): - original_df.loc[:, "a"] = [4, 4, 4] + with tm.assert_cow_warning(warn_copy_on_write): + original_df.loc[:, "a"] = [4, 4, 4] if using_copy_on_write: assert (sliced_df["a"] == [1, 2, 3]).all() else: diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 018400fcfe0cf..26035d1af7f90 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -288,6 +288,8 @@ def test_read_expands_user_home_dir( ): reader(path) + # TODO(CoW-warn) avoid warnings in the stata reader code + @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") @pytest.mark.parametrize( "reader, module, path", [ diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 0a72fd7bbec7d..cd4075076062c 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -721,21 +721,15 @@ def test_basic_subset_columns(self, pa, df_full): def test_to_bytes_without_path_or_buf_provided(self, pa, df_full): # GH 37105 - msg = "Mismatched null-like values nan and None found" - warn = None - if using_copy_on_write(): - warn = FutureWarning - buf_bytes = df_full.to_parquet(engine=pa) assert isinstance(buf_bytes, bytes) buf_stream = BytesIO(buf_bytes) res = read_parquet(buf_stream) - expected = df_full.copy(deep=False) + expected = df_full.copy() expected.loc[1, "string_with_nan"] = None - with tm.assert_produces_warning(warn, match=msg): - tm.assert_frame_equal(df_full, res) + tm.assert_frame_equal(res, expected) def test_duplicate_columns(self, pa): # not currently able to handle duplicate columns diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index c8ff42509505a..82e6c2964b8c5 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -32,6 +32,11 @@ read_stata, ) +# TODO(CoW-warn) avoid warnings in the stata reader code +pytestmark = pytest.mark.filterwarnings( + "ignore:Setting a value on a view:FutureWarning" +) + @pytest.fixture def mixed_frame(): @@ -178,11 +183,13 @@ def test_read_dta2(self, datapath): path2 = datapath("io", "data", "stata", "stata2_115.dta") path3 = datapath("io", "data", "stata", "stata2_117.dta") - with tm.assert_produces_warning(UserWarning): + # TODO(CoW-warn) avoid warnings in the stata reader code + # once fixed -> remove `raise_on_extra_warnings=False` again + with tm.assert_produces_warning(UserWarning, raise_on_extra_warnings=False): parsed_114 = self.read_dta(path1) - with tm.assert_produces_warning(UserWarning): + with tm.assert_produces_warning(UserWarning, raise_on_extra_warnings=False): parsed_115 = self.read_dta(path2) - with tm.assert_produces_warning(UserWarning): + with tm.assert_produces_warning(UserWarning, raise_on_extra_warnings=False): parsed_117 = self.read_dta(path3) # FIXME: don't leave commented-out # 113 is buggy due to limits of date format support in Stata