From 647a3939c89f236033c06afe120c8e879bf1f853 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Mar 2021 10:26:16 -0700 Subject: [PATCH 1/5] DOC: suppress warnings from CategoricalBlock deprecation --- doc/source/user_guide/io.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index cf153ddd2cbbd..3b7a6037a9715 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -5240,6 +5240,7 @@ Write to a feather file. Read from a feather file. .. ipython:: python + :okwarning: result = pd.read_feather("example.feather") result @@ -5323,6 +5324,7 @@ Write to a parquet file. Read from a parquet file. .. ipython:: python + :okwarning: result = pd.read_parquet("example_fp.parquet", engine="fastparquet") result = pd.read_parquet("example_pa.parquet", engine="pyarrow") From a62929eeb35308e65e9e804ca02f60c6dbf9bb35 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Mar 2021 17:39:01 -0700 Subject: [PATCH 2/5] REF: simplify BlockManager.idelete --- pandas/core/generic.py | 2 +- pandas/core/internals/managers.py | 40 +++++++------------------------ 2 files changed, 10 insertions(+), 32 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c20b2840a40ab..f1cce9b94e3dc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4063,7 +4063,7 @@ def __delitem__(self, key) -> None: # there was no match, this call should raise the appropriate # exception: loc = self.axes[-1].get_loc(key) - self._mgr.idelete(loc) + self._mgr = self._mgr.idelete(loc) # delete from the caches try: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index da78fc5dfba76..64a57d6ab497f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1066,41 +1066,18 @@ def iget_values(self, i: int) -> ArrayLike: values = block.iget(self.blklocs[i]) return values - def idelete(self, indexer): + def idelete(self, indexer) -> BlockManager: """ - Delete selected locations in-place (new block and array, same BlockManager) + Delete selected locations, returning a new BlockManager. """ is_deleted = np.zeros(self.shape[0], dtype=np.bool_) is_deleted[indexer] = True - ref_loc_offset = -is_deleted.cumsum() + taker = (~is_deleted).nonzero()[0] - is_blk_deleted = [False] * len(self.blocks) - - if isinstance(indexer, int): - affected_start = indexer - else: - affected_start = is_deleted.nonzero()[0][0] - - for blkno, _ in _fast_count_smallints(self.blknos[affected_start:]): - blk = self.blocks[blkno] - bml = blk.mgr_locs - blk_del = is_deleted[bml.indexer].nonzero()[0] - - if len(blk_del) == len(bml): - is_blk_deleted[blkno] = True - continue - elif len(blk_del) != 0: - blk.delete(blk_del) - bml = blk.mgr_locs - - blk.mgr_locs = bml.add(ref_loc_offset[bml.indexer]) - - # FIXME: use Index.delete as soon as it uses fastpath=True - self.axes[0] = self.items[~is_deleted] - self.blocks = tuple( - b for blkno, b in enumerate(self.blocks) if not is_blk_deleted[blkno] - ) - self._rebuild_blknos_and_blklocs() + nbs = self._slice_take_blocks_ax0(taker, only_slice=True) + new_index = self.items.delete(indexer) + axes = [new_index, self.axes[1]] + return type(self)._simple_new(nbs, axes) def iset(self, loc: Union[int, slice, np.ndarray], value): """ @@ -1715,7 +1692,7 @@ def _consolidate_check(self): def _consolidate_inplace(self): pass - def idelete(self, indexer): + def idelete(self, indexer) -> SingleBlockManager: """ Delete single location from SingleBlockManager. @@ -1723,6 +1700,7 @@ def idelete(self, indexer): """ self._block.delete(indexer) self.axes[0] = self.axes[0].delete(indexer) + return self def fast_xs(self, loc): """ From 11962929f55876634b3e8b905ae08473ad5ffbfe Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Mar 2021 17:40:21 -0700 Subject: [PATCH 3/5] ArrayManager compat --- pandas/core/internals/array_manager.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 34b3d83c066c2..37dae775a9e54 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -848,6 +848,7 @@ def idelete(self, indexer): self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]] self._axes = [self._axes[0], self._axes[1][to_keep]] + return self def iset(self, loc: Union[int, slice, np.ndarray], value): """ @@ -1259,7 +1260,7 @@ def apply(self, func, **kwargs): def setitem(self, indexer, value): return self.apply_with_block("setitem", indexer=indexer, value=value) - def idelete(self, indexer): + def idelete(self, indexer) -> SingleArrayManager: """ Delete selected locations in-place (new array, same ArrayManager) """ @@ -1268,6 +1269,7 @@ def idelete(self, indexer): self.arrays = [self.arrays[0][to_keep]] self._axes = [self._axes[0][to_keep]] + return self def _get_data_subset(self, predicate: Callable) -> ArrayManager: # used in get_numeric_data / get_bool_data From 5e04a66e0d330801024a34741306cf906bef2558 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 21 Mar 2021 09:23:28 -0700 Subject: [PATCH 4/5] mypy fixup --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 64a57d6ab497f..120298ceacfea 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1077,7 +1077,7 @@ def idelete(self, indexer) -> BlockManager: nbs = self._slice_take_blocks_ax0(taker, only_slice=True) new_index = self.items.delete(indexer) axes = [new_index, self.axes[1]] - return type(self)._simple_new(nbs, axes) + return type(self)._simple_new(tuple(nbs), axes) def iset(self, loc: Union[int, slice, np.ndarray], value): """ From 7f081ec8aacb3ecc761ba7bda6ea1e4c3567de97 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 21 Mar 2021 11:33:39 -0700 Subject: [PATCH 5/5] troubleshoot --- pandas/core/internals/managers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 120298ceacfea..b84bbb20b09ad 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1075,8 +1075,8 @@ def idelete(self, indexer) -> BlockManager: taker = (~is_deleted).nonzero()[0] nbs = self._slice_take_blocks_ax0(taker, only_slice=True) - new_index = self.items.delete(indexer) - axes = [new_index, self.axes[1]] + new_columns = self.items[~is_deleted] + axes = [new_columns, self.axes[1]] return type(self)._simple_new(tuple(nbs), axes) def iset(self, loc: Union[int, slice, np.ndarray], value):