Skip to content

REF: simplify BlockManager.idelete #40548

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5240,6 +5240,7 @@ Write to a feather file.
Read from a feather file.

.. ipython:: python
:okwarning:

result = pd.read_feather("example.feather")
result
Expand Down Expand Up @@ -5323,6 +5324,7 @@ Write to a parquet file.
Read from a parquet file.

.. ipython:: python
:okwarning:

result = pd.read_parquet("example_fp.parquet", engine="fastparquet")
result = pd.read_parquet("example_pa.parquet", engine="pyarrow")
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4063,7 +4063,7 @@ def __delitem__(self, key) -> None:
# there was no match, this call should raise the appropriate
# exception:
loc = self.axes[-1].get_loc(key)
self._mgr.idelete(loc)
self._mgr = self._mgr.idelete(loc)

# delete from the caches
try:
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,7 @@ def idelete(self, indexer):

self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]]
self._axes = [self._axes[0], self._axes[1][to_keep]]
return self

def iset(self, loc: Union[int, slice, np.ndarray], value):
"""
Expand Down Expand Up @@ -1259,7 +1260,7 @@ def apply(self, func, **kwargs):
def setitem(self, indexer, value):
return self.apply_with_block("setitem", indexer=indexer, value=value)

def idelete(self, indexer):
def idelete(self, indexer) -> SingleArrayManager:
"""
Delete selected locations in-place (new array, same ArrayManager)
"""
Expand All @@ -1268,6 +1269,7 @@ def idelete(self, indexer):

self.arrays = [self.arrays[0][to_keep]]
self._axes = [self._axes[0][to_keep]]
return self

def _get_data_subset(self, predicate: Callable) -> ArrayManager:
# used in get_numeric_data / get_bool_data
Expand Down
40 changes: 9 additions & 31 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,41 +1066,18 @@ def iget_values(self, i: int) -> ArrayLike:
values = block.iget(self.blklocs[i])
return values

def idelete(self, indexer):
def idelete(self, indexer) -> BlockManager:
"""
Delete selected locations in-place (new block and array, same BlockManager)
Delete selected locations, returning a new BlockManager.
"""
is_deleted = np.zeros(self.shape[0], dtype=np.bool_)
is_deleted[indexer] = True
ref_loc_offset = -is_deleted.cumsum()
taker = (~is_deleted).nonzero()[0]

is_blk_deleted = [False] * len(self.blocks)

if isinstance(indexer, int):
affected_start = indexer
else:
affected_start = is_deleted.nonzero()[0][0]

for blkno, _ in _fast_count_smallints(self.blknos[affected_start:]):
blk = self.blocks[blkno]
bml = blk.mgr_locs
blk_del = is_deleted[bml.indexer].nonzero()[0]

if len(blk_del) == len(bml):
is_blk_deleted[blkno] = True
continue
elif len(blk_del) != 0:
blk.delete(blk_del)
bml = blk.mgr_locs

blk.mgr_locs = bml.add(ref_loc_offset[bml.indexer])

# FIXME: use Index.delete as soon as it uses fastpath=True
self.axes[0] = self.items[~is_deleted]
self.blocks = tuple(
b for blkno, b in enumerate(self.blocks) if not is_blk_deleted[blkno]
)
self._rebuild_blknos_and_blklocs()
nbs = self._slice_take_blocks_ax0(taker, only_slice=True)
new_columns = self.items[~is_deleted]
axes = [new_columns, self.axes[1]]
return type(self)._simple_new(tuple(nbs), axes)

def iset(self, loc: Union[int, slice, np.ndarray], value):
"""
Expand Down Expand Up @@ -1715,14 +1692,15 @@ def _consolidate_check(self):
def _consolidate_inplace(self):
pass

def idelete(self, indexer):
def idelete(self, indexer) -> SingleBlockManager:
"""
Delete single location from SingleBlockManager.

Ensures that self.blocks doesn't become empty.
"""
self._block.delete(indexer)
self.axes[0] = self.axes[0].delete(indexer)
return self

def fast_xs(self, loc):
"""
Expand Down