Skip to content

REF: better names, stricter typing for mgr/block indexing methods #53259

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pandas/_libs/internals.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import numpy as np

from pandas._typing import (
ArrayLike,
T,
Self,
npt,
)

Expand Down Expand Up @@ -76,12 +76,12 @@ class SharedBlock:
class NumpyBlock(SharedBlock):
values: np.ndarray
@final
def getitem_block_index(self: T, slicer: slice) -> T: ...
def slice_block_rows(self, slicer: slice) -> Self: ...

class NDArrayBackedBlock(SharedBlock):
values: NDArrayBackedExtensionArray
@final
def getitem_block_index(self: T, slicer: slice) -> T: ...
def slice_block_rows(self, slicer: slice) -> Self: ...

class Block(SharedBlock): ...

Expand All @@ -95,7 +95,7 @@ class BlockManager:
def __init__(
self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=...
) -> None: ...
def get_slice(self: T, slobj: slice, axis: int = ...) -> T: ...
def get_slice(self, slobj: slice, axis: int = ...) -> Self: ...
def _rebuild_blknos_and_blklocs(self) -> None: ...

class BlockValuesRefs:
Expand Down
10 changes: 5 additions & 5 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ cdef class NumpyBlock(SharedBlock):
# set placement, ndim and refs
self.values = values

cpdef NumpyBlock getitem_block_index(self, slice slicer):
cpdef NumpyBlock slice_block_rows(self, slice slicer):
"""
Perform __getitem__-like specialized to slicing along index.

Expand Down Expand Up @@ -743,7 +743,7 @@ cdef class NDArrayBackedBlock(SharedBlock):
# set placement, ndim and refs
self.values = values

cpdef NDArrayBackedBlock getitem_block_index(self, slice slicer):
cpdef NDArrayBackedBlock slice_block_rows(self, slice slicer):
"""
Perform __getitem__-like specialized to slicing along index.

Expand Down Expand Up @@ -899,15 +899,15 @@ cdef class BlockManager:
# -------------------------------------------------------------------
# Indexing

cdef BlockManager _get_index_slice(self, slice slobj):
cdef BlockManager _slice_mgr_rows(self, slice slobj):
cdef:
SharedBlock blk, nb
BlockManager mgr
ndarray blknos, blklocs

nbs = []
for blk in self.blocks:
nb = blk.getitem_block_index(slobj)
nb = blk.slice_block_rows(slobj)
nbs.append(nb)

new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)]
Expand All @@ -926,7 +926,7 @@ cdef class BlockManager:
if axis == 0:
new_blocks = self._slice_take_blocks_ax0(slobj)
elif axis == 1:
return self._get_index_slice(slobj)
return self._slice_mgr_rows(slobj)
else:
raise IndexError("Requested axis not found in manager")

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1275,7 +1275,7 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleArrayManager:
new_index = self.index._getitem_slice(slobj)
return type(self)([new_array], [new_index], verify_integrity=False)

def getitem_mgr(self, indexer) -> SingleArrayManager:
def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> SingleArrayManager:
new_array = self.array[indexer]
new_index = self.index[indexer]
return type(self)([new_array], [new_index])
Expand Down
36 changes: 21 additions & 15 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
FillnaOptions,
IgnoreRaise,
QuantileInterpolation,
Self,
Shape,
npt,
)
Expand Down Expand Up @@ -259,36 +260,41 @@ def __len__(self) -> int:
return len(self.values)

@final
def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:
def slice_block_columns(self, slc: slice) -> Self:
"""
Perform __getitem__-like, return result as block.
"""
new_mgr_locs = self._mgr_locs[slc]

new_values = self._slice(slc)
refs = self.refs
return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)

@final
def take_block_columns(self, indices: npt.NDArray[np.intp]) -> Self:
"""
Perform __getitem__-like, return result as block.

Only supports slices that preserve dimensionality.
"""
# Note: the only place where we are called with ndarray[intp]
# is from internals.concat, and we can verify that never happens
# with 1-column blocks, i.e. never for ExtensionBlock.
# Note: only called from is from internals.concat, and we can verify
# that never happens with 1-column blocks, i.e. never for ExtensionBlock.

new_mgr_locs = self._mgr_locs[slicer]
new_mgr_locs = self._mgr_locs[indices]

new_values = self._slice(slicer)
refs = self.refs if isinstance(slicer, slice) else None
return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)
new_values = self._slice(indices)
return type(self)(new_values, new_mgr_locs, self.ndim, refs=None)

@final
def getitem_block_columns(
self, slicer: slice, new_mgr_locs: BlockPlacement
) -> Block:
) -> Self:
"""
Perform __getitem__-like, return result as block.

Only supports slices that preserve dimensionality.
"""
new_values = self._slice(slicer)

if new_values.ndim != self.values.ndim:
raise ValueError("Only same dim slicing is allowed")

return type(self)(new_values, new_mgr_locs, self.ndim, refs=self.refs)

@final
Expand Down Expand Up @@ -1997,7 +2003,7 @@ def _slice(
-------
ExtensionArray
"""
# Notes: ndarray[bool] is only reachable when via getitem_mgr, which
# Notes: ndarray[bool] is only reachable when via get_rows_with_mask, which
# is only for Series, i.e. self.ndim == 1.

# return same dims as we currently have
Expand All @@ -2023,7 +2029,7 @@ def _slice(
return self.values[slicer]

@final
def getitem_block_index(self, slicer: slice) -> ExtensionBlock:
def slice_block_rows(self, slicer: slice) -> Self:
"""
Perform __getitem__-like specialized to slicing along index.
"""
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,10 @@ def _get_block_for_concat_plan(
slc = lib.maybe_indices_to_slice(ax0_blk_indexer, max_len)
# TODO: in all extant test cases 2023-04-08 we have a slice here.
# Will this always be the case?
nb = blk.getitem_block(slc)
if isinstance(slc, slice):
nb = blk.slice_block_columns(slc)
else:
nb = blk.take_block_columns(slc)

# assert nb.shape == (len(bp), mgr.shape[1])
return nb
Expand Down
20 changes: 5 additions & 15 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
import pandas.core.algorithms as algos
from pandas.core.arrays import DatetimeArray
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
import pandas.core.common as com
from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
extract_array,
Expand Down Expand Up @@ -1872,7 +1871,7 @@ def concat_horizontal(cls, mgrs: list[Self], axes: list[Index]) -> Self:
# We need to do getitem_block here otherwise we would be altering
# blk.mgr_locs in place, which would render it invalid. This is only
# relevant in the copy=False case.
nb = blk.getitem_block(slice(None))
nb = blk.slice_block_columns(slice(None))
nb._mgr_locs = nb._mgr_locs.add(offset)
blocks.append(nb)

Expand Down Expand Up @@ -2018,21 +2017,12 @@ def _blklocs(self):
"""compat with BlockManager"""
return None

def getitem_mgr(self, indexer: slice | np.ndarray) -> SingleBlockManager:
def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Self:
# similar to get_slice, but not restricted to slice indexer
blk = self._block
if (
using_copy_on_write()
and isinstance(indexer, np.ndarray)
and len(indexer) > 0
and com.is_bool_indexer(indexer)
and indexer.all()
):
if using_copy_on_write() and len(indexer) > 0 and indexer.all():
return type(self)(blk.copy(deep=False), self.index)
array = blk._slice(indexer)
if array.ndim > 1:
# This will be caught by Series._get_values
raise ValueError("dimension-expanding indexing not allowed")
array = blk.values[indexer]

bp = BlockPlacement(slice(0, len(array)))
# TODO(CoW) in theory only need to track reference if new_array is a view
Expand All @@ -2048,7 +2038,7 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager:
raise IndexError("Requested axis not found in manager")

blk = self._block
array = blk._slice(slobj)
array = blk.values[slobj]
bp = BlockPlacement(slice(0, len(array)))
# TODO this method is only used in groupby SeriesSplitter at the moment,
# so passing refs is not yet covered by the tests
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,7 +998,7 @@ def __getitem__(self, key):
if com.is_bool_indexer(key):
key = check_bool_indexer(self.index, key)
key = np.asarray(key, dtype=bool)
return self._get_values(key)
return self._get_rows_with_mask(key)

return self._get_with(key)

Expand Down Expand Up @@ -1054,8 +1054,8 @@ def _get_values_tuple(self, key: tuple):
new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
return new_ser.__finalize__(self)

def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series:
new_mgr = self._mgr.getitem_mgr(indexer)
def _get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Series:
new_mgr = self._mgr.get_rows_with_mask(indexer)
return self._constructor(new_mgr, fastpath=True).__finalize__(self)

def _get_value(self, label, takeable: bool = False):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def test_getitem_slice(self, data):
assert isinstance(result, type(data))

def test_getitem_ellipsis_and_slice(self, data):
# GH#40353 this is called from getitem_block_index
# GH#40353 this is called from slice_block_rows
result = data[..., :]
self.assert_extension_array_equal(result, data)

Expand Down
19 changes: 8 additions & 11 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,12 +942,17 @@ def assert_slice_ok(mgr, axis, slobj):

if isinstance(slobj, slice):
sliced = mgr.get_slice(slobj, axis=axis)
elif mgr.ndim == 1 and axis == 0:
sliced = mgr.getitem_mgr(slobj)
elif (
mgr.ndim == 1
and axis == 0
and isinstance(slobj, np.ndarray)
and slobj.dtype == bool
):
sliced = mgr.get_rows_with_mask(slobj)
else:
# BlockManager doesn't support non-slice, SingleBlockManager
# doesn't support axis > 0
return
raise TypeError(slobj)

mat_slobj = (slice(None),) * axis + (slobj,)
tm.assert_numpy_array_equal(
Expand Down Expand Up @@ -978,14 +983,6 @@ def assert_slice_ok(mgr, axis, slobj):
mgr, ax, np.array([True, True, False], dtype=np.bool_)
)

# fancy indexer
assert_slice_ok(mgr, ax, [])
assert_slice_ok(mgr, ax, list(range(mgr.shape[ax])))

if mgr.shape[ax] >= 3:
assert_slice_ok(mgr, ax, [0, 1, 2])
assert_slice_ok(mgr, ax, [-1, -2, -3])

@pytest.mark.parametrize("mgr", MANAGERS)
def test_take(self, mgr):
def assert_take_ok(mgr, axis, indexer):
Expand Down