diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 57558491e0e57..6779d47792dbc 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -405,6 +405,10 @@ Other API changes - Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`) - Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype ``object`` if the new DataFrame/Series has length 0 (:issue:`49572`) - :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`) +- Creating a new DataFrame using a full slice on both axes with :attr:`~DataFrame.loc` + or :attr:`~DataFrame.iloc` (thus, ``df.loc[:, :]`` or ``df.iloc[:, :]``) now returns a + new DataFrame (shallow copy) instead of the original DataFrame, consistent with other + methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 929a1c4e30a5f..070ec7c7a2e4a 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -934,6 +934,11 @@ def _getitem_tuple_same_dim(self, tup: tuple): # be handled by the _getitem_lowerdim call above. assert retval.ndim == self.ndim + if retval is self.obj: + # if all axes were a null slice (`df.loc[:, :]`), ensure we still + # return a new object (https://github.com/pandas-dev/pandas/pull/49469) + retval = retval.copy(deep=False) + return retval @final diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index b8028fd28f8f8..5ebbbcfb5a301 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -612,6 +612,62 @@ def test_subset_chained_single_block_row(using_copy_on_write, using_array_manage assert subset.iloc[0] == 0 +@pytest.mark.parametrize( + "method", + [ + lambda df: df[:], + lambda df: df.loc[:, :], + lambda df: df.loc[:], + lambda df: df.iloc[:, :], + lambda df: df.iloc[:], + ], + ids=["getitem", "loc", "loc-rows", "iloc", "iloc-rows"], +) +def test_null_slice(request, method, using_copy_on_write): + # Case: also all variants of indexing with a null slice (:) should return + # new objects to ensure we correctly use CoW for the results + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + df_orig = df.copy() + + df2 = method(df) + + # we always return new objects (shallow copy), regardless of CoW or not + assert df2 is not df + + # and those trigger CoW when mutated + df2.iloc[0, 0] = 0 + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + else: + assert df.iloc[0, 0] == 0 + + +@pytest.mark.parametrize( + "method", + [ + lambda s: s[:], + lambda s: s.loc[:], + lambda s: s.iloc[:], + ], + ids=["getitem", "loc", "iloc"], +) +def test_null_slice_series(request, method, using_copy_on_write): + s = Series([1, 2, 3], index=["a", "b", "c"]) + s_orig = s.copy() + + s2 = method(s) + + # we always return new objects, regardless of CoW or not + assert s2 is not s + + # and those trigger CoW when mutated + s2.iloc[0] = 0 + if using_copy_on_write: + tm.assert_series_equal(s, s_orig) + else: + assert s.iloc[0] == 0 + + # TODO add more tests modifying the parent diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 11cbcfe231928..6bd0806a42a5a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1097,6 +1097,7 @@ def test_identity_slice_returns_new_object( sliced_df = original_df.loc[:] assert sliced_df is not original_df assert original_df[:] is not original_df + assert original_df.loc[:, :] is not original_df # should be a shallow copy assert np.shares_memory(original_df["a"]._values, sliced_df["a"]._values) @@ -1110,7 +1111,6 @@ def test_identity_slice_returns_new_object( assert (sliced_df["a"] == 4).all() # These should not return copies - assert original_df is original_df.loc[:, :] df = DataFrame(np.random.randn(10, 4)) assert df[0] is df.loc[:, 0]