diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 41512ff9be82f..b91c627e86bd0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1455,17 +1455,9 @@ def iget(self, col): return self.values def set_inplace(self, locs, values) -> None: - # NB: This is a misnomer, is supposed to be inplace but is not, - # see GH#33457 # When an ndarray, we should have locs.tolist() == [0] # When a BlockPlacement we should have list(locs) == [0] - self.values = values - try: - # TODO(GH33457) this can be removed - self._cache.clear() - except AttributeError: - # _cache not yet initialized - pass + self.values[:] = values def _maybe_squeeze_arg(self, arg): """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3e0b62da64f42..2cce60110fbd5 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -660,7 +660,6 @@ def reindex_indexer( use_na_proxy : bool, default False Whether to use a np.void ndarray for newly introduced columns. - pandas-indexer with -1's only. """ if indexer is None: if new_axis is self.axes[axis] and not copy: diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index bee8ccb125315..36df1ddcdea74 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -545,6 +545,9 @@ def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): # setting it triggers setting with copy sliced = float_frame.iloc[:, -3:] + # check that the setitem below is not a no-op + assert not (float_frame["C"] == 4).all() + assert np.shares_memory(sliced["C"]._values, float_frame["C"]._values) msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" @@ -553,6 +556,12 @@ def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): assert (float_frame["C"] == 4).all() + # GH#35417 setting with setitem creates a new array, so we get the warning + # but do not modify the original + with pytest.raises(com.SettingWithCopyError, match=msg): + sliced["C"] = 5.0 + assert (float_frame["C"] == 4).all() + def test_getitem_setitem_non_ix_labels(self): df = tm.makeTimeDataFrame() diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 33fb191027c27..6c09f543f877d 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -4,8 +4,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - from pandas import ( DataFrame, Index, @@ -170,9 +168,9 @@ def test_rename_multiindex(self): renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) tm.assert_index_equal(renamed.index, new_index) - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) setitem copy/view def test_rename_nocopy(self, float_frame): renamed = float_frame.rename(columns={"C": "foo"}, copy=False) + renamed["foo"][:] = 1.0 assert np.shares_memory(renamed["foo"]._values, float_frame["C"]._values) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 17a990a6c7a38..f80beaf19effe 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -841,6 +841,22 @@ def test_identity_slice_returns_new_object(self, using_array_manager, request): # should be a shallow copy assert np.shares_memory(original_df["a"], sliced_df["a"]) + original_df.loc[:, "a"] = [4, 4, 4] + if not using_array_manager: + assert (sliced_df["a"] == 4).all() + else: + # FIXME: what is the expected/desired behavior here? test it! + pass + + # GH#35417 but setting with setitem creates a new array, so + # sliced_df is not changed + original_df["a"] = [5, 5, 5] + if using_array_manager: + # TODO(ArrayManager) verify it is expected that the original didn't change + # setitem is replacing full column, so doesn't update "viewing" dataframe + assert not (sliced_df["a"] == 4).all() + else: + assert (sliced_df["a"] == 4).all() # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig original_df.loc[:, "a"] = [4, 4, 4] diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b215aee4ea1c6..ffbcf1a5a7ad8 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1033,13 +1033,8 @@ def test_loc_empty_list_indexer_is_ok(self): df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True ) - def test_identity_slice_returns_new_object(self, using_array_manager, request): + def test_identity_slice_returns_new_object(self, request): # GH13873 - if using_array_manager: - mark = pytest.mark.xfail( - reason="setting with .loc[:, 'a'] does not alter inplace" - ) - request.node.add_marker(mark) original_df = DataFrame({"a": [1, 2, 3]}) sliced_df = original_df.loc[:] @@ -1047,6 +1042,8 @@ def test_identity_slice_returns_new_object(self, using_array_manager, request): assert original_df[:] is not original_df # should be a shallow copy + original_df["a"][:] = [4, 4, 4] + assert np.shares_memory(original_df["a"]._values, sliced_df["a"]._values) # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index a5ba684a37edf..e012dbabdd793 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -744,35 +744,51 @@ def test_get_numeric_data(self): item_shape=(3,), ) mgr.iset(5, np.array([1, 2, 3], dtype=np.object_)) - numeric = mgr.get_numeric_data() tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"])) + mgr_idx = mgr.items.get_loc("float") + num_idx = numeric.items.get_loc("float") + tm.assert_almost_equal( - mgr.iget(mgr.items.get_loc("float")).internal_values(), - numeric.iget(numeric.items.get_loc("float")).internal_values(), + mgr.iget(mgr_idx).internal_values(), + numeric.iget(num_idx).internal_values(), ) # Check sharing + numeric.iget(num_idx).internal_values()[:] = [100.0, 200.0, 300.0] numeric.iset( numeric.items.get_loc("float"), np.array([100.0, 200.0, 300.0]), inplace=True, ) + tm.assert_almost_equal( - mgr.iget(mgr.items.get_loc("float")).internal_values(), + mgr.iget(mgr_idx).internal_values(), np.array([100.0, 200.0, 300.0]), ) + def test_get_numeric_data_copy(self): + mgr = create_mgr( + "int: int; float: float; complex: complex;" + "str: object; bool: bool; obj: object; dt: datetime", + item_shape=(3,), + ) + mgr.iset(5, np.array([1, 2, 3], dtype=np.object_)) + numeric = mgr.get_numeric_data() + mgr_idx = mgr.items.get_loc("float") + num_idx = numeric.items.get_loc("float") + numeric2 = mgr.get_numeric_data(copy=True) tm.assert_index_equal(numeric.items, Index(["int", "float", "complex", "bool"])) + numeric2.iget(num_idx).internal_values()[:] = [1000.0, 2000.0, 3000.0] numeric2.iset( numeric2.items.get_loc("float"), np.array([1000.0, 2000.0, 3000.0]), inplace=True, ) tm.assert_almost_equal( - mgr.iget(mgr.items.get_loc("float")).internal_values(), - np.array([100.0, 200.0, 300.0]), + mgr.iget(mgr_idx).internal_values(), + np.array([1.0, 1.0, 1.0]), ) def test_get_bool_data(self): @@ -790,18 +806,28 @@ def test_get_bool_data(self): bools.iget(bools.items.get_loc("bool")).internal_values(), ) - bools.iset(0, np.array([True, False, True]), inplace=True) + # GH#33457 setting a new array on bools does _not_ alter the original + # array in-place, so mgr is unchanged + bools.iset(0, np.array([True, False, True])) tm.assert_numpy_array_equal( mgr.iget(mgr.items.get_loc("bool")).internal_values(), - np.array([True, False, True]), + np.array([True, True, True]), ) - # Check sharing + def test_get_bool_data_copy(self): + # GH#35417 + mgr = create_mgr( + "int: int; float: float; complex: complex;" + "str: object; bool: bool; obj: object; dt: datetime", + item_shape=(3,), + ) + mgr.iset(6, np.array([True, False, True], dtype=np.object_)) + bools2 = mgr.get_bool_data(copy=True) - bools2.iset(0, np.array([False, True, False])) + bools2.blocks[0].values[:] = [[False, True, False]] tm.assert_numpy_array_equal( mgr.iget(mgr.items.get_loc("bool")).internal_values(), - np.array([True, False, True]), + np.array([True, True, True]), ) def test_unicode_repr_doesnt_raise(self): @@ -1346,23 +1372,6 @@ def check_frame_setitem(self, elem, index: Index, inplace: bool): assert df.dtypes[0] == object -class TestShouldStore: - def test_should_store_categorical(self): - cat = Categorical(["A", "B", "C"]) - df = DataFrame(cat) - blk = df._mgr.blocks[0] - - # matching dtype - assert blk.should_store(cat) - assert blk.should_store(cat[:-1]) - - # different dtype - assert not blk.should_store(cat.as_ordered()) - - # ndarray instead of Categorical - assert not blk.should_store(np.asarray(cat)) - - def test_validate_ndim(block_maker): values = np.array([1.0, 2.0]) placement = slice(2) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index bf0a10fa702a5..3f970dfb19818 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1310,7 +1310,7 @@ def test_index_with_nan(self): # all-nan in mi df2 = df.copy() - df2.loc[:, "id2"] = np.nan + df2["id2"] = np.nan y = df2.set_index("id2") result = y.to_string() expected = (