diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e3f342a024f6c..e3f9f6dbb0025 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4551,6 +4551,13 @@ def __getitem__(self, key): else: return result + def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT: + """ + Fastpath for __getitem__ when we know we have a slice. + """ + res = self._data[slobj] + return type(self)._simple_new(res, name=self._name) + @final def _can_hold_identifiers_and_holds_name(self, name) -> bool: """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1edc716a24872..fc3e404998b43 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2101,6 +2101,24 @@ def __getitem__(self, key): verify_integrity=False, ) + def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex: + """ + Fastpath for __getitem__ when we know we have a slice. + """ + sortorder = None + if slobj.step is None or slobj.step > 0: + sortorder = self.sortorder + + new_codes = [level_codes[slobj] for level_codes in self.codes] + + return type(self)( + levels=self.levels, + codes=new_codes, + names=self._names, + sortorder=sortorder, + verify_integrity=False, + ) + @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take( self: MultiIndex, indices, axis=0, allow_fill=True, fill_value=None, **kwargs diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 33525f19912d5..56093c2a399c2 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -817,6 +817,13 @@ def __getitem__(self, key): # fall back to Int64Index return super().__getitem__(key) + def _getitem_slice(self: RangeIndex, slobj: slice) -> RangeIndex: + """ + Fastpath for __getitem__ when we know we have a slice. + """ + res = self._range[slobj] + return type(self)._simple_new(res, name=self._name) + @unpack_zerodim_and_defer("__floordiv__") def __floordiv__(self, other): diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 906c95c825cab..63b78e6d030f2 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -788,6 +788,8 @@ def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager: return type(self)(arrays, new_axes, verify_integrity=False) + getitem_mgr = get_slice + def fast_xs(self, loc: int) -> ArrayLike: """ Return the array corresponding to `frame.iloc[loc]`. diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a11ca0aa82b29..f0d7d7e441527 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -368,7 +368,7 @@ def getitem_block(self, slicer, new_mgr_locs=None) -> Block: """ Perform __getitem__-like, return result as block. - As of now, only supports slices that preserve dimensionality. + Only supports slices that preserve dimensionality. """ if new_mgr_locs is None: axis0_slicer = slicer[0] if isinstance(slicer, tuple) else slicer diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8b08a5fd70537..2daa1ce8dc9a4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -802,6 +802,7 @@ def _combine( return type(self).from_blocks(new_blocks, axes) def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager: + assert isinstance(slobj, slice), type(slobj) if axis == 0: new_blocks = self._slice_take_blocks_ax0(slobj) @@ -812,7 +813,7 @@ def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager: raise IndexError("Requested axis not found in manager") new_axes = list(self.axes) - new_axes[axis] = new_axes[axis][slobj] + new_axes[axis] = new_axes[axis]._getitem_slice(slobj) return type(self)._simple_new(tuple(new_blocks), new_axes) @@ -1201,7 +1202,9 @@ def value_getitem(placement): # Newly created block's dtype may already be present. self._known_consolidated = False - def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False): + def insert( + self, loc: int, item: Hashable, value: ArrayLike, allow_duplicates: bool = False + ): """ Insert item at selected position. @@ -1209,7 +1212,7 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False ---------- loc : int item : hashable - value : array_like + value : np.ndarray or ExtensionArray allow_duplicates: bool If False, trying to insert non-unique item will raise @@ -1226,11 +1229,9 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False if value.ndim == 2: value = value.T - elif value.ndim == self.ndim - 1: - # TODO(EA2D): special case not needed with 2D EAs - value = ensure_block_shape(value, ndim=2) + else: + value = ensure_block_shape(value, ndim=self.ndim) - # TODO: type value as ArrayLike block = new_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1)) for blkno, count in _fast_count_smallints(self.blknos[loc:]): @@ -1367,6 +1368,7 @@ def _slice_take_blocks_ax0( # TODO(EA2D): special casing unnecessary with 2D EAs if sllen == 0: return [] + # TODO: tests all have isinstance(slobj, slice), other possibilities? return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] elif not allow_fill or self.ndim == 1: if allow_fill and fill_value is None: @@ -1376,9 +1378,11 @@ def _slice_take_blocks_ax0( # GH#33597 slice instead of take, so we get # views instead of copies blocks = [ - blk.getitem_block([ml], new_mgr_locs=i) + blk.getitem_block(slice(ml, ml + 1), new_mgr_locs=i) for i, ml in enumerate(slobj) ] + # We have + # all(np.shares_memory(nb.values, blk.values) for nb in blocks) return blocks else: return [ @@ -1440,7 +1444,8 @@ def _slice_take_blocks_ax0( # GH#33597 slice instead of take, so we get # views instead of copies for i, ml in zip(taker, mgr_locs): - nb = blk.getitem_block([i], new_mgr_locs=ml) + nb = blk.getitem_block(slice(i, i + 1), new_mgr_locs=ml) + # We have np.shares_memory(nb.values, blk.values) blocks.append(nb) else: nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs) @@ -1604,14 +1609,23 @@ def _blklocs(self): """ compat with BlockManager """ return None + def getitem_mgr(self, indexer) -> SingleBlockManager: + # similar to get_slice, but not restricted to slice indexer + blk = self._block + array = blk._slice(indexer) + block = blk.make_block_same_class(array, placement=slice(0, len(array))) + return type(self)(block, self.index[indexer]) + def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: + assert isinstance(slobj, slice), type(slobj) if axis >= self.ndim: raise IndexError("Requested axis not found in manager") blk = self._block array = blk._slice(slobj) block = blk.make_block_same_class(array, placement=slice(0, len(array))) - return type(self)(block, self.index[slobj]) + new_index = self.index._getitem_slice(slobj) + return type(self)(block, new_index) @property def index(self) -> Index: @@ -1975,6 +1989,7 @@ def _preprocess_slice_or_indexer(slice_or_indexer, length: int, allow_fill: bool ): return "mask", slice_or_indexer, slice_or_indexer.sum() else: + # TODO: np.intp? indexer = np.asanyarray(slice_or_indexer, dtype=np.int64) if not allow_fill: indexer = maybe_convert_indices(indexer, length) diff --git a/pandas/core/series.py b/pandas/core/series.py index a25178402cf6b..468c3baca92c3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -983,7 +983,8 @@ def _get_values_tuple(self, key): def _get_values(self, indexer): try: - return self._constructor(self._mgr.get_slice(indexer)).__finalize__(self) + new_mgr = self._mgr.getitem_mgr(indexer) + return self._constructor(new_mgr).__finalize__(self) except ValueError: # mpl compat if we look up e.g. ser[:, np.newaxis]; # see tests.series.timeseries.test_mpl_compat_hack diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 683006d9b3b9c..a7f318498a8ac 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -823,7 +823,16 @@ def assert_slice_ok(mgr, axis, slobj): slobj = np.concatenate( [slobj, np.zeros(len(ax) - len(slobj), dtype=bool)] ) - sliced = mgr.get_slice(slobj, axis=axis) + + if isinstance(slobj, slice): + sliced = mgr.get_slice(slobj, axis=axis) + elif mgr.ndim == 1 and axis == 0: + sliced = mgr.getitem_mgr(slobj) + else: + # BlockManager doesnt support non-slice, SingleBlockManager + # doesnt support axis > 0 + return + mat_slobj = (slice(None),) * axis + (slobj,) tm.assert_numpy_array_equal( mat[mat_slobj], sliced.as_array(), check_dtype=False