diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi index 9996fbfc346df..ce112413f8a64 100644 --- a/pandas/_libs/internals.pyi +++ b/pandas/_libs/internals.pyi @@ -10,7 +10,7 @@ import numpy as np from pandas._typing import ( ArrayLike, - T, + Self, npt, ) @@ -76,12 +76,12 @@ class SharedBlock: class NumpyBlock(SharedBlock): values: np.ndarray @final - def getitem_block_index(self: T, slicer: slice) -> T: ... + def slice_block_rows(self, slicer: slice) -> Self: ... class NDArrayBackedBlock(SharedBlock): values: NDArrayBackedExtensionArray @final - def getitem_block_index(self: T, slicer: slice) -> T: ... + def slice_block_rows(self, slicer: slice) -> Self: ... class Block(SharedBlock): ... @@ -95,7 +95,7 @@ class BlockManager: def __init__( self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=... ) -> None: ... - def get_slice(self: T, slobj: slice, axis: int = ...) -> T: ... + def get_slice(self, slobj: slice, axis: int = ...) -> Self: ... def _rebuild_blknos_and_blklocs(self) -> None: ... class BlockValuesRefs: diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index cfc43521cf606..adf4e8c926fa3 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -715,7 +715,7 @@ cdef class NumpyBlock(SharedBlock): # set placement, ndim and refs self.values = values - cpdef NumpyBlock getitem_block_index(self, slice slicer): + cpdef NumpyBlock slice_block_rows(self, slice slicer): """ Perform __getitem__-like specialized to slicing along index. @@ -743,7 +743,7 @@ cdef class NDArrayBackedBlock(SharedBlock): # set placement, ndim and refs self.values = values - cpdef NDArrayBackedBlock getitem_block_index(self, slice slicer): + cpdef NDArrayBackedBlock slice_block_rows(self, slice slicer): """ Perform __getitem__-like specialized to slicing along index. @@ -899,7 +899,7 @@ cdef class BlockManager: # ------------------------------------------------------------------- # Indexing - cdef BlockManager _get_index_slice(self, slice slobj): + cdef BlockManager _slice_mgr_rows(self, slice slobj): cdef: SharedBlock blk, nb BlockManager mgr @@ -907,7 +907,7 @@ cdef class BlockManager: nbs = [] for blk in self.blocks: - nb = blk.getitem_block_index(slobj) + nb = blk.slice_block_rows(slobj) nbs.append(nb) new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)] @@ -926,7 +926,7 @@ cdef class BlockManager: if axis == 0: new_blocks = self._slice_take_blocks_ax0(slobj) elif axis == 1: - return self._get_index_slice(slobj) + return self._slice_mgr_rows(slobj) else: raise IndexError("Requested axis not found in manager") diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 15f3630b2d735..f04ac76998adc 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -1275,7 +1275,7 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleArrayManager: new_index = self.index._getitem_slice(slobj) return type(self)([new_array], [new_index], verify_integrity=False) - def getitem_mgr(self, indexer) -> SingleArrayManager: + def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> SingleArrayManager: new_array = self.array[indexer] new_index = self.index[indexer] return type(self)([new_array], [new_index]) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 911cd4ae255ce..a08a44a11866a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -36,6 +36,7 @@ FillnaOptions, IgnoreRaise, QuantileInterpolation, + Self, Shape, npt, ) @@ -259,36 +260,41 @@ def __len__(self) -> int: return len(self.values) @final - def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block: + def slice_block_columns(self, slc: slice) -> Self: + """ + Perform __getitem__-like, return result as block. + """ + new_mgr_locs = self._mgr_locs[slc] + + new_values = self._slice(slc) + refs = self.refs + return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs) + + @final + def take_block_columns(self, indices: npt.NDArray[np.intp]) -> Self: """ Perform __getitem__-like, return result as block. Only supports slices that preserve dimensionality. """ - # Note: the only place where we are called with ndarray[intp] - # is from internals.concat, and we can verify that never happens - # with 1-column blocks, i.e. never for ExtensionBlock. + # Note: only called from is from internals.concat, and we can verify + # that never happens with 1-column blocks, i.e. never for ExtensionBlock. - new_mgr_locs = self._mgr_locs[slicer] + new_mgr_locs = self._mgr_locs[indices] - new_values = self._slice(slicer) - refs = self.refs if isinstance(slicer, slice) else None - return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs) + new_values = self._slice(indices) + return type(self)(new_values, new_mgr_locs, self.ndim, refs=None) @final def getitem_block_columns( self, slicer: slice, new_mgr_locs: BlockPlacement - ) -> Block: + ) -> Self: """ Perform __getitem__-like, return result as block. Only supports slices that preserve dimensionality. """ new_values = self._slice(slicer) - - if new_values.ndim != self.values.ndim: - raise ValueError("Only same dim slicing is allowed") - return type(self)(new_values, new_mgr_locs, self.ndim, refs=self.refs) @final @@ -1997,7 +2003,7 @@ def _slice( ------- ExtensionArray """ - # Notes: ndarray[bool] is only reachable when via getitem_mgr, which + # Notes: ndarray[bool] is only reachable when via get_rows_with_mask, which # is only for Series, i.e. self.ndim == 1. # return same dims as we currently have @@ -2023,7 +2029,7 @@ def _slice( return self.values[slicer] @final - def getitem_block_index(self, slicer: slice) -> ExtensionBlock: + def slice_block_rows(self, slicer: slice) -> Self: """ Perform __getitem__-like specialized to slicing along index. """ diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index cc46977cee6dc..1d22ed3fe8897 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -328,7 +328,10 @@ def _get_block_for_concat_plan( slc = lib.maybe_indices_to_slice(ax0_blk_indexer, max_len) # TODO: in all extant test cases 2023-04-08 we have a slice here. # Will this always be the case? - nb = blk.getitem_block(slc) + if isinstance(slc, slice): + nb = blk.slice_block_columns(slc) + else: + nb = blk.take_block_columns(slc) # assert nb.shape == (len(bp), mgr.shape[1]) return nb diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 31827a6bcf6d0..8e60bf02fed75 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -54,7 +54,6 @@ import pandas.core.algorithms as algos from pandas.core.arrays import DatetimeArray from pandas.core.arrays._mixins import NDArrayBackedExtensionArray -import pandas.core.common as com from pandas.core.construction import ( ensure_wrapped_if_datetimelike, extract_array, @@ -1872,7 +1871,7 @@ def concat_horizontal(cls, mgrs: list[Self], axes: list[Index]) -> Self: # We need to do getitem_block here otherwise we would be altering # blk.mgr_locs in place, which would render it invalid. This is only # relevant in the copy=False case. - nb = blk.getitem_block(slice(None)) + nb = blk.slice_block_columns(slice(None)) nb._mgr_locs = nb._mgr_locs.add(offset) blocks.append(nb) @@ -2018,21 +2017,12 @@ def _blklocs(self): """compat with BlockManager""" return None - def getitem_mgr(self, indexer: slice | np.ndarray) -> SingleBlockManager: + def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Self: # similar to get_slice, but not restricted to slice indexer blk = self._block - if ( - using_copy_on_write() - and isinstance(indexer, np.ndarray) - and len(indexer) > 0 - and com.is_bool_indexer(indexer) - and indexer.all() - ): + if using_copy_on_write() and len(indexer) > 0 and indexer.all(): return type(self)(blk.copy(deep=False), self.index) - array = blk._slice(indexer) - if array.ndim > 1: - # This will be caught by Series._get_values - raise ValueError("dimension-expanding indexing not allowed") + array = blk.values[indexer] bp = BlockPlacement(slice(0, len(array))) # TODO(CoW) in theory only need to track reference if new_array is a view @@ -2048,7 +2038,7 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager: raise IndexError("Requested axis not found in manager") blk = self._block - array = blk._slice(slobj) + array = blk.values[slobj] bp = BlockPlacement(slice(0, len(array))) # TODO this method is only used in groupby SeriesSplitter at the moment, # so passing refs is not yet covered by the tests diff --git a/pandas/core/series.py b/pandas/core/series.py index 540a770b1c897..6106a1680fc7d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -998,7 +998,7 @@ def __getitem__(self, key): if com.is_bool_indexer(key): key = check_bool_indexer(self.index, key) key = np.asarray(key, dtype=bool) - return self._get_values(key) + return self._get_rows_with_mask(key) return self._get_with(key) @@ -1054,8 +1054,8 @@ def _get_values_tuple(self, key: tuple): new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type] return new_ser.__finalize__(self) - def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series: - new_mgr = self._mgr.getitem_mgr(indexer) + def _get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Series: + new_mgr = self._mgr.get_rows_with_mask(indexer) return self._constructor(new_mgr, fastpath=True).__finalize__(self) def _get_value(self, label, takeable: bool = False): diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index cf51d9d693155..890dc0a1a43a3 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -285,7 +285,7 @@ def test_getitem_slice(self, data): assert isinstance(result, type(data)) def test_getitem_ellipsis_and_slice(self, data): - # GH#40353 this is called from getitem_block_index + # GH#40353 this is called from slice_block_rows result = data[..., :] self.assert_extension_array_equal(result, data) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 79eb4110cfba9..47e7092743b00 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -942,12 +942,17 @@ def assert_slice_ok(mgr, axis, slobj): if isinstance(slobj, slice): sliced = mgr.get_slice(slobj, axis=axis) - elif mgr.ndim == 1 and axis == 0: - sliced = mgr.getitem_mgr(slobj) + elif ( + mgr.ndim == 1 + and axis == 0 + and isinstance(slobj, np.ndarray) + and slobj.dtype == bool + ): + sliced = mgr.get_rows_with_mask(slobj) else: # BlockManager doesn't support non-slice, SingleBlockManager # doesn't support axis > 0 - return + raise TypeError(slobj) mat_slobj = (slice(None),) * axis + (slobj,) tm.assert_numpy_array_equal( @@ -978,14 +983,6 @@ def assert_slice_ok(mgr, axis, slobj): mgr, ax, np.array([True, True, False], dtype=np.bool_) ) - # fancy indexer - assert_slice_ok(mgr, ax, []) - assert_slice_ok(mgr, ax, list(range(mgr.shape[ax]))) - - if mgr.shape[ax] >= 3: - assert_slice_ok(mgr, ax, [0, 1, 2]) - assert_slice_ok(mgr, ax, [-1, -2, -3]) - @pytest.mark.parametrize("mgr", MANAGERS) def test_take(self, mgr): def assert_take_ok(mgr, axis, indexer):