Skip to content

CLN: BlockManager.get_slice require only slice arg #40262

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 10, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4560,6 +4560,13 @@ def __getitem__(self, key):
else:
return result

def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT:
"""
Fastpath for __getitem__ when we know we have a slice.
"""
res = self._data[slobj]
return type(self)._simple_new(res, name=self._name)

@final
def _can_hold_identifiers_and_holds_name(self, name) -> bool:
"""
Expand Down
18 changes: 18 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2097,6 +2097,24 @@ def __getitem__(self, key):
verify_integrity=False,
)

def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex:
"""
Fastpath for __getitem__ when we know we have a slice.
"""
sortorder = None
if slobj.step is None or slobj.step > 0:
sortorder = self.sortorder

new_codes = [level_codes[slobj] for level_codes in self.codes]

return type(self)(
levels=self.levels,
codes=new_codes,
names=self._names,
sortorder=sortorder,
verify_integrity=False,
)

@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
def take(
self: MultiIndex, indices, axis=0, allow_fill=True, fill_value=None, **kwargs
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,13 @@ def __getitem__(self, key):
# fall back to Int64Index
return super().__getitem__(key)

def _getitem_slice(self: RangeIndex, slobj: slice) -> RangeIndex:
"""
Fastpath for __getitem__ when we know we have a slice.
"""
res = self._range[slobj]
return type(self)._simple_new(res, name=self._name)

@unpack_zerodim_and_defer("__floordiv__")
def __floordiv__(self, other):

Expand Down
2 changes: 2 additions & 0 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,8 @@ def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager:

return type(self)(arrays, new_axes, verify_integrity=False)

getitem_mgr = get_slice

def fast_xs(self, loc: int) -> ArrayLike:
"""
Return the array corresponding to `frame.iloc[loc]`.
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def getitem_block(self, slicer, new_mgr_locs=None) -> Block:
"""
Perform __getitem__-like, return result as block.

As of now, only supports slices that preserve dimensionality.
Only supports slices that preserve dimensionality.
"""
if new_mgr_locs is None:
axis0_slicer = slicer[0] if isinstance(slicer, tuple) else slicer
Expand Down
35 changes: 25 additions & 10 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,7 @@ def _combine(
return type(self).from_blocks(new_blocks, axes)

def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager:
assert isinstance(slobj, slice), type(slobj)

if axis == 0:
new_blocks = self._slice_take_blocks_ax0(slobj)
Expand All @@ -812,7 +813,7 @@ def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager:
raise IndexError("Requested axis not found in manager")

new_axes = list(self.axes)
new_axes[axis] = new_axes[axis][slobj]
new_axes[axis] = new_axes[axis]._getitem_slice(slobj)

return type(self)._simple_new(tuple(new_blocks), new_axes)

Expand Down Expand Up @@ -1201,15 +1202,17 @@ def value_getitem(placement):
# Newly created block's dtype may already be present.
self._known_consolidated = False

def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False):
def insert(
self, loc: int, item: Hashable, value: ArrayLike, allow_duplicates: bool = False
):
"""
Insert item at selected position.

Parameters
----------
loc : int
item : hashable
value : array_like
value : np.ndarray or ExtensionArray
allow_duplicates: bool
If False, trying to insert non-unique item will raise

Expand All @@ -1226,11 +1229,9 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False

if value.ndim == 2:
value = value.T
elif value.ndim == self.ndim - 1 and not is_extension_array_dtype(value.dtype):
# TODO(EA2D): special case not needed with 2D EAs
value = ensure_block_shape(value, ndim=2)
else:
value = ensure_block_shape(value, ndim=self.ndim)

# TODO: type value as ArrayLike
block = new_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))

for blkno, count in _fast_count_smallints(self.blknos[loc:]):
Expand Down Expand Up @@ -1367,6 +1368,7 @@ def _slice_take_blocks_ax0(
# TODO(EA2D): special casing unnecessary with 2D EAs
if sllen == 0:
return []
# TODO: tests all have isinstance(slobj, slice), other possibilities?
return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
elif not allow_fill or self.ndim == 1:
if allow_fill and fill_value is None:
Expand All @@ -1376,9 +1378,11 @@ def _slice_take_blocks_ax0(
# GH#33597 slice instead of take, so we get
# views instead of copies
blocks = [
blk.getitem_block([ml], new_mgr_locs=i)
blk.getitem_block(slice(ml, ml + 1), new_mgr_locs=i)
for i, ml in enumerate(slobj)
]
# We have
# all(np.shares_memory(nb.values, blk.values) for nb in blocks)
return blocks
else:
return [
Expand Down Expand Up @@ -1440,7 +1444,8 @@ def _slice_take_blocks_ax0(
# GH#33597 slice instead of take, so we get
# views instead of copies
for i, ml in zip(taker, mgr_locs):
nb = blk.getitem_block([i], new_mgr_locs=ml)
nb = blk.getitem_block(slice(i, i + 1), new_mgr_locs=ml)
# We have np.shares_memory(nb.values, blk.values)
blocks.append(nb)
else:
nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs)
Expand Down Expand Up @@ -1604,14 +1609,23 @@ def _blklocs(self):
""" compat with BlockManager """
return None

def getitem_mgr(self, indexer) -> SingleBlockManager:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this exist for BlockManager and just be no-op?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its only ever used by Series

# similar to get_slice, but not restricted to slice indexer
blk = self._block
array = blk._slice(indexer)
block = blk.make_block_same_class(array, placement=slice(0, len(array)))
return type(self)(block, self.index[indexer])

def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager:
assert isinstance(slobj, slice), type(slobj)
if axis >= self.ndim:
raise IndexError("Requested axis not found in manager")

blk = self._block
array = blk._slice(slobj)
block = blk.make_block_same_class(array, placement=slice(0, len(array)))
return type(self)(block, self.index[slobj])
new_index = self.index._getitem_slice(slobj)
return type(self)(block, new_index)

@property
def index(self) -> Index:
Expand Down Expand Up @@ -1975,6 +1989,7 @@ def _preprocess_slice_or_indexer(slice_or_indexer, length: int, allow_fill: bool
):
return "mask", slice_or_indexer, slice_or_indexer.sum()
else:
# TODO: np.intp?
indexer = np.asanyarray(slice_or_indexer, dtype=np.int64)
if not allow_fill:
indexer = maybe_convert_indices(indexer, length)
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,7 +983,8 @@ def _get_values_tuple(self, key):

def _get_values(self, indexer):
try:
return self._constructor(self._mgr.get_slice(indexer)).__finalize__(self)
new_mgr = self._mgr.getitem_mgr(indexer)
return self._constructor(new_mgr).__finalize__(self)
except ValueError:
# mpl compat if we look up e.g. ser[:, np.newaxis];
# see tests.series.timeseries.test_mpl_compat_hack
Expand Down
11 changes: 10 additions & 1 deletion pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -823,7 +823,16 @@ def assert_slice_ok(mgr, axis, slobj):
slobj = np.concatenate(
[slobj, np.zeros(len(ax) - len(slobj), dtype=bool)]
)
sliced = mgr.get_slice(slobj, axis=axis)

if isinstance(slobj, slice):
sliced = mgr.get_slice(slobj, axis=axis)
elif mgr.ndim == 1 and axis == 0:
sliced = mgr.getitem_mgr(slobj)
else:
# BlockManager doesnt support non-slice, SingleBlockManager
# doesnt support axis > 0
return

mat_slobj = (slice(None),) * axis + (slobj,)
tm.assert_numpy_array_equal(
mat[mat_slobj], sliced.as_array(), check_dtype=False
Expand Down