Skip to content

Commit c35eca3

Browse files
BUG: also return new object in case of null slice for both rows and columsn (.(i)loc[:, :]) (#49469)
1 parent a047fb6 commit c35eca3

File tree

4 files changed

+66
-1
lines changed

4 files changed

+66
-1
lines changed

doc/source/whatsnew/v2.0.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,10 @@ Other API changes
405405
- Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`)
406406
- Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype ``object`` if the new DataFrame/Series has length 0 (:issue:`49572`)
407407
- :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`)
408+
- Creating a new DataFrame using a full slice on both axes with :attr:`~DataFrame.loc`
409+
or :attr:`~DataFrame.iloc` (thus, ``df.loc[:, :]`` or ``df.iloc[:, :]``) now returns a
410+
new DataFrame (shallow copy) instead of the original DataFrame, consistent with other
411+
methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`)
408412
-
409413

410414
.. ---------------------------------------------------------------------------

pandas/core/indexing.py

+5
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,11 @@ def _getitem_tuple_same_dim(self, tup: tuple):
934934
# be handled by the _getitem_lowerdim call above.
935935
assert retval.ndim == self.ndim
936936

937+
if retval is self.obj:
938+
# if all axes were a null slice (`df.loc[:, :]`), ensure we still
939+
# return a new object (https://github.com/pandas-dev/pandas/pull/49469)
940+
retval = retval.copy(deep=False)
941+
937942
return retval
938943

939944
@final

pandas/tests/copy_view/test_indexing.py

+56
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,62 @@ def test_subset_chained_single_block_row(using_copy_on_write, using_array_manage
612612
assert subset.iloc[0] == 0
613613

614614

615+
@pytest.mark.parametrize(
616+
"method",
617+
[
618+
lambda df: df[:],
619+
lambda df: df.loc[:, :],
620+
lambda df: df.loc[:],
621+
lambda df: df.iloc[:, :],
622+
lambda df: df.iloc[:],
623+
],
624+
ids=["getitem", "loc", "loc-rows", "iloc", "iloc-rows"],
625+
)
626+
def test_null_slice(request, method, using_copy_on_write):
627+
# Case: also all variants of indexing with a null slice (:) should return
628+
# new objects to ensure we correctly use CoW for the results
629+
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
630+
df_orig = df.copy()
631+
632+
df2 = method(df)
633+
634+
# we always return new objects (shallow copy), regardless of CoW or not
635+
assert df2 is not df
636+
637+
# and those trigger CoW when mutated
638+
df2.iloc[0, 0] = 0
639+
if using_copy_on_write:
640+
tm.assert_frame_equal(df, df_orig)
641+
else:
642+
assert df.iloc[0, 0] == 0
643+
644+
645+
@pytest.mark.parametrize(
646+
"method",
647+
[
648+
lambda s: s[:],
649+
lambda s: s.loc[:],
650+
lambda s: s.iloc[:],
651+
],
652+
ids=["getitem", "loc", "iloc"],
653+
)
654+
def test_null_slice_series(request, method, using_copy_on_write):
655+
s = Series([1, 2, 3], index=["a", "b", "c"])
656+
s_orig = s.copy()
657+
658+
s2 = method(s)
659+
660+
# we always return new objects, regardless of CoW or not
661+
assert s2 is not s
662+
663+
# and those trigger CoW when mutated
664+
s2.iloc[0] = 0
665+
if using_copy_on_write:
666+
tm.assert_series_equal(s, s_orig)
667+
else:
668+
assert s.iloc[0] == 0
669+
670+
615671
# TODO add more tests modifying the parent
616672

617673

pandas/tests/indexing/test_loc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1097,6 +1097,7 @@ def test_identity_slice_returns_new_object(
10971097
sliced_df = original_df.loc[:]
10981098
assert sliced_df is not original_df
10991099
assert original_df[:] is not original_df
1100+
assert original_df.loc[:, :] is not original_df
11001101

11011102
# should be a shallow copy
11021103
assert np.shares_memory(original_df["a"]._values, sliced_df["a"]._values)
@@ -1110,7 +1111,6 @@ def test_identity_slice_returns_new_object(
11101111
assert (sliced_df["a"] == 4).all()
11111112

11121113
# These should not return copies
1113-
assert original_df is original_df.loc[:, :]
11141114
df = DataFrame(np.random.randn(10, 4))
11151115
assert df[0] is df.loc[:, 0]
11161116

0 commit comments

Comments
 (0)