From 4bd5f80cf09e6141b96900d70b87ec1e1b2f85e6 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 28 Jan 2022 15:11:04 -0800 Subject: [PATCH 1/2] REF: make Block.delete return a new Block --- pandas/core/internals/blocks.py | 29 ++++++------------ pandas/core/internals/managers.py | 12 ++++++-- pandas/tests/internals/test_internals.py | 39 +++++++++++++++--------- 3 files changed, 42 insertions(+), 38 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 77a0d7804d27b..0984e827c0e92 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -378,9 +378,9 @@ def set_inplace(self, locs, values) -> None: """ self.values[locs] = values - def delete(self, loc) -> None: + def delete(self, loc) -> Block: """ - Delete given loc(-s) from block in-place. + Return a new Block with the given loc(s) deleted. """ # Argument 1 to "delete" has incompatible type "Union[ndarray[Any, Any], # ExtensionArray]"; expected "Union[_SupportsArray[dtype[Any]], @@ -388,13 +388,9 @@ def delete(self, loc) -> None: # [_SupportsArray[dtype[Any]]]], Sequence[Sequence[Sequence[ # _SupportsArray[dtype[Any]]]]], Sequence[Sequence[Sequence[Sequence[ # _SupportsArray[dtype[Any]]]]]]]" [arg-type] - self.values = np.delete(self.values, loc, 0) # type: ignore[arg-type] - self.mgr_locs = self._mgr_locs.delete(loc) - try: - self._cache.clear() - except AttributeError: - # _cache not yet initialized - pass + values = np.delete(self.values, loc, 0) # type: ignore[arg-type] + mgr_locs = self._mgr_locs.delete(loc) + return type(self)(values, placement=mgr_locs, ndim=self.ndim) @final def apply(self, func, **kwargs) -> list[Block]: @@ -1501,18 +1497,11 @@ def fillna( return [self.make_block_same_class(values=new_values)] - def delete(self, loc) -> None: - """ - Delete given loc(-s) from block in-place. - """ + def delete(self, loc) -> Block: # This will be unnecessary if/when __array_function__ is implemented - self.values = self.values.delete(loc) - self.mgr_locs = self._mgr_locs.delete(loc) - try: - self._cache.clear() - except AttributeError: - # _cache not yet initialized - pass + values = self.values.delete(loc) + mgr_locs = self._mgr_locs.delete(loc) + return type(self)(values, placement=mgr_locs, ndim=self.ndim) @cache_readonly def array_values(self) -> ExtensionArray: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8599a1281a976..3655d98a3ef0a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1133,8 +1133,12 @@ def value_getitem(placement): if len(val_locs) == len(blk.mgr_locs): removed_blknos.append(blkno_l) else: - blk.delete(blk_locs) - self._blklocs[blk.mgr_locs.indexer] = np.arange(len(blk)) + nb = blk.delete(blk_locs) + new_blocks = ( + self.blocks[:blkno_l] + (nb,) + self.blocks[blkno_l + 1 :] + ) + self.blocks = new_blocks + self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb)) if len(removed_blknos): # Remove blocks & update blknos accordingly @@ -1869,8 +1873,10 @@ def idelete(self, indexer) -> SingleBlockManager: Ensures that self.blocks doesn't become empty. """ - self._block.delete(indexer) + nb = self._block.delete(indexer) + self.blocks = (nb,) self.axes[0] = self.axes[0].delete(indexer) + self._cache.clear() return self def fast_xs(self, loc): diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 83df57f922c9c..3be82a071b7f1 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -285,27 +285,36 @@ def test_copy(self): def test_delete(self): newb = self.fblock.copy() - newb.delete(0) - assert isinstance(newb.mgr_locs, BlockPlacement) + locs = newb.mgr_locs + nb = newb.delete(0) + assert newb.mgr_locs is locs + + assert nb is not newb + tm.assert_numpy_array_equal( - newb.mgr_locs.as_array, np.array([2, 4], dtype=np.intp) + nb.mgr_locs.as_array, np.array([2, 4], dtype=np.intp) ) - assert (newb.values[0] == 1).all() + assert not (newb.values[0] == 1).all() + assert (nb.values[0] == 1).all() newb = self.fblock.copy() - newb.delete(1) - assert isinstance(newb.mgr_locs, BlockPlacement) + locs = newb.mgr_locs + nb = newb.delete(1) + assert newb.mgr_locs is locs + tm.assert_numpy_array_equal( - newb.mgr_locs.as_array, np.array([0, 4], dtype=np.intp) + nb.mgr_locs.as_array, np.array([0, 4], dtype=np.intp) ) - assert (newb.values[1] == 2).all() + assert not (newb.values[1] == 2).all() + assert (nb.values[1] == 2).all() newb = self.fblock.copy() - newb.delete(2) + locs = newb.mgr_locs + nb = newb.delete(2) tm.assert_numpy_array_equal( - newb.mgr_locs.as_array, np.array([0, 2], dtype=np.intp) + nb.mgr_locs.as_array, np.array([0, 2], dtype=np.intp) ) - assert (newb.values[1] == 1).all() + assert (nb.values[1] == 1).all() newb = self.fblock.copy() @@ -319,15 +328,15 @@ def test_delete_datetimelike(self): blk = df._mgr.blocks[0] assert isinstance(blk.values, TimedeltaArray) - blk.delete(1) - assert isinstance(blk.values, TimedeltaArray) + nb = blk.delete(1) + assert isinstance(nb.values, TimedeltaArray) df = DataFrame(arr.view("M8[ns]")) blk = df._mgr.blocks[0] assert isinstance(blk.values, DatetimeArray) - blk.delete([1, 3]) - assert isinstance(blk.values, DatetimeArray) + nb = blk.delete([1, 3]) + assert isinstance(nb.values, DatetimeArray) def test_split(self): # GH#37799 From 66cc2f33103d913d2a44b43189b794624be615be Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 28 Jan 2022 20:57:54 -0800 Subject: [PATCH 2/2] mypy fixup --- pandas/core/internals/managers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3655d98a3ef0a..d2fca9f741b4c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1134,10 +1134,10 @@ def value_getitem(placement): removed_blknos.append(blkno_l) else: nb = blk.delete(blk_locs) - new_blocks = ( + blocks_tup = ( self.blocks[:blkno_l] + (nb,) + self.blocks[blkno_l + 1 :] ) - self.blocks = new_blocks + self.blocks = blocks_tup self._blklocs[nb.mgr_locs.indexer] = np.arange(len(nb)) if len(removed_blknos):