Skip to content

Commit 248c966

Browse files
authored
CoW: Avoid tracking references in delete if not necessary (#54111)
1 parent cdb9cff commit 248c966

File tree

3 files changed

+24
-8
lines changed

3 files changed

+24
-8
lines changed

pandas/core/internals/blocks.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -318,15 +318,16 @@ def take_block_columns(self, indices: npt.NDArray[np.intp]) -> Self:
318318

319319
@final
320320
def getitem_block_columns(
321-
self, slicer: slice, new_mgr_locs: BlockPlacement
321+
self, slicer: slice, new_mgr_locs: BlockPlacement, ref_inplace_op: bool = False
322322
) -> Self:
323323
"""
324324
Perform __getitem__-like, return result as block.
325325
326326
Only supports slices that preserve dimensionality.
327327
"""
328328
new_values = self._slice(slicer)
329-
return type(self)(new_values, new_mgr_locs, self.ndim, refs=self.refs)
329+
refs = self.refs if not ref_inplace_op or self.refs.has_reference() else None
330+
return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)
330331

331332
@final
332333
def _can_hold_element(self, element: Any) -> bool:

pandas/core/internals/managers.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,7 @@ def _slice_take_blocks_ax0(
673673
only_slice: bool = False,
674674
*,
675675
use_na_proxy: bool = False,
676+
ref_inplace_op: bool = False,
676677
) -> list[Block]:
677678
"""
678679
Slice/take blocks along axis=0.
@@ -688,6 +689,8 @@ def _slice_take_blocks_ax0(
688689
This is used when called from ops.blockwise.operate_blockwise.
689690
use_na_proxy : bool, default False
690691
Whether to use a np.void ndarray for newly introduced columns.
692+
ref_inplace_op: bool, default False
693+
Don't track refs if True because we operate inplace
691694
692695
Returns
693696
-------
@@ -718,7 +721,9 @@ def _slice_take_blocks_ax0(
718721
# views instead of copies
719722
blocks = [
720723
blk.getitem_block_columns(
721-
slice(ml, ml + 1), new_mgr_locs=BlockPlacement(i)
724+
slice(ml, ml + 1),
725+
new_mgr_locs=BlockPlacement(i),
726+
ref_inplace_op=ref_inplace_op,
722727
)
723728
for i, ml in enumerate(slobj)
724729
]
@@ -1380,7 +1385,7 @@ def idelete(self, indexer) -> BlockManager:
13801385
is_deleted[indexer] = True
13811386
taker = (~is_deleted).nonzero()[0]
13821387

1383-
nbs = self._slice_take_blocks_ax0(taker, only_slice=True)
1388+
nbs = self._slice_take_blocks_ax0(taker, only_slice=True, ref_inplace_op=True)
13841389
new_columns = self.items[~is_deleted]
13851390
axes = [new_columns, self.axes[1]]
13861391
return type(self)(tuple(nbs), axes, verify_integrity=False)

pandas/tests/copy_view/test_core_functionalities.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -80,11 +80,21 @@ def test_delete(using_copy_on_write):
8080
)
8181
del df["b"]
8282
if using_copy_on_write:
83-
# TODO: This should not have references, delete makes a shallow copy
84-
# but keeps the blocks alive
85-
assert df._mgr.blocks[0].refs.has_reference()
86-
assert df._mgr.blocks[1].refs.has_reference()
83+
assert not df._mgr.blocks[0].refs.has_reference()
84+
assert not df._mgr.blocks[1].refs.has_reference()
8785

8886
df = df[["a"]]
8987
if using_copy_on_write:
9088
assert not df._mgr.blocks[0].refs.has_reference()
89+
90+
91+
def test_delete_reference(using_copy_on_write):
92+
df = DataFrame(
93+
np.random.default_rng(2).standard_normal((4, 3)), columns=["a", "b", "c"]
94+
)
95+
x = df[:]
96+
del df["b"]
97+
if using_copy_on_write:
98+
assert df._mgr.blocks[0].refs.has_reference()
99+
assert df._mgr.blocks[1].refs.has_reference()
100+
assert x._mgr.blocks[0].refs.has_reference()

0 commit comments

Comments
 (0)