Skip to content

Commit 49c4ab7

Browse files
jbrockmendelim-vinicius
authored and
im-vinicius
committed
REF: better names, stricter typing for mgr/block indexing methods (pandas-dev#53259)
* REF: tighter typing, better names for manager indexing methods * REF: dont need _slice in SingleManager calls * fix CoW test * fix test
1 parent daa2f64 commit 49c4ab7

File tree

9 files changed

+52
-56
lines changed

9 files changed

+52
-56
lines changed

pandas/_libs/internals.pyi

+4-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import numpy as np
1010

1111
from pandas._typing import (
1212
ArrayLike,
13-
T,
13+
Self,
1414
npt,
1515
)
1616

@@ -76,12 +76,12 @@ class SharedBlock:
7676
class NumpyBlock(SharedBlock):
7777
values: np.ndarray
7878
@final
79-
def getitem_block_index(self: T, slicer: slice) -> T: ...
79+
def slice_block_rows(self, slicer: slice) -> Self: ...
8080

8181
class NDArrayBackedBlock(SharedBlock):
8282
values: NDArrayBackedExtensionArray
8383
@final
84-
def getitem_block_index(self: T, slicer: slice) -> T: ...
84+
def slice_block_rows(self, slicer: slice) -> Self: ...
8585

8686
class Block(SharedBlock): ...
8787

@@ -95,7 +95,7 @@ class BlockManager:
9595
def __init__(
9696
self, blocks: tuple[B, ...], axes: list[Index], verify_integrity=...
9797
) -> None: ...
98-
def get_slice(self: T, slobj: slice, axis: int = ...) -> T: ...
98+
def get_slice(self, slobj: slice, axis: int = ...) -> Self: ...
9999
def _rebuild_blknos_and_blklocs(self) -> None: ...
100100

101101
class BlockValuesRefs:

pandas/_libs/internals.pyx

+5-5
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ cdef class NumpyBlock(SharedBlock):
715715
# set placement, ndim and refs
716716
self.values = values
717717

718-
cpdef NumpyBlock getitem_block_index(self, slice slicer):
718+
cpdef NumpyBlock slice_block_rows(self, slice slicer):
719719
"""
720720
Perform __getitem__-like specialized to slicing along index.
721721
@@ -743,7 +743,7 @@ cdef class NDArrayBackedBlock(SharedBlock):
743743
# set placement, ndim and refs
744744
self.values = values
745745

746-
cpdef NDArrayBackedBlock getitem_block_index(self, slice slicer):
746+
cpdef NDArrayBackedBlock slice_block_rows(self, slice slicer):
747747
"""
748748
Perform __getitem__-like specialized to slicing along index.
749749
@@ -899,15 +899,15 @@ cdef class BlockManager:
899899
# -------------------------------------------------------------------
900900
# Indexing
901901

902-
cdef BlockManager _get_index_slice(self, slice slobj):
902+
cdef BlockManager _slice_mgr_rows(self, slice slobj):
903903
cdef:
904904
SharedBlock blk, nb
905905
BlockManager mgr
906906
ndarray blknos, blklocs
907907

908908
nbs = []
909909
for blk in self.blocks:
910-
nb = blk.getitem_block_index(slobj)
910+
nb = blk.slice_block_rows(slobj)
911911
nbs.append(nb)
912912

913913
new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)]
@@ -926,7 +926,7 @@ cdef class BlockManager:
926926
if axis == 0:
927927
new_blocks = self._slice_take_blocks_ax0(slobj)
928928
elif axis == 1:
929-
return self._get_index_slice(slobj)
929+
return self._slice_mgr_rows(slobj)
930930
else:
931931
raise IndexError("Requested axis not found in manager")
932932

pandas/core/internals/array_manager.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1275,7 +1275,7 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleArrayManager:
12751275
new_index = self.index._getitem_slice(slobj)
12761276
return type(self)([new_array], [new_index], verify_integrity=False)
12771277

1278-
def getitem_mgr(self, indexer) -> SingleArrayManager:
1278+
def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> SingleArrayManager:
12791279
new_array = self.array[indexer]
12801280
new_index = self.index[indexer]
12811281
return type(self)([new_array], [new_index])

pandas/core/internals/blocks.py

+21-15
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
FillnaOptions,
3737
IgnoreRaise,
3838
QuantileInterpolation,
39+
Self,
3940
Shape,
4041
npt,
4142
)
@@ -259,36 +260,41 @@ def __len__(self) -> int:
259260
return len(self.values)
260261

261262
@final
262-
def getitem_block(self, slicer: slice | npt.NDArray[np.intp]) -> Block:
263+
def slice_block_columns(self, slc: slice) -> Self:
264+
"""
265+
Perform __getitem__-like, return result as block.
266+
"""
267+
new_mgr_locs = self._mgr_locs[slc]
268+
269+
new_values = self._slice(slc)
270+
refs = self.refs
271+
return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)
272+
273+
@final
274+
def take_block_columns(self, indices: npt.NDArray[np.intp]) -> Self:
263275
"""
264276
Perform __getitem__-like, return result as block.
265277
266278
Only supports slices that preserve dimensionality.
267279
"""
268-
# Note: the only place where we are called with ndarray[intp]
269-
# is from internals.concat, and we can verify that never happens
270-
# with 1-column blocks, i.e. never for ExtensionBlock.
280+
# Note: only called from is from internals.concat, and we can verify
281+
# that never happens with 1-column blocks, i.e. never for ExtensionBlock.
271282

272-
new_mgr_locs = self._mgr_locs[slicer]
283+
new_mgr_locs = self._mgr_locs[indices]
273284

274-
new_values = self._slice(slicer)
275-
refs = self.refs if isinstance(slicer, slice) else None
276-
return type(self)(new_values, new_mgr_locs, self.ndim, refs=refs)
285+
new_values = self._slice(indices)
286+
return type(self)(new_values, new_mgr_locs, self.ndim, refs=None)
277287

278288
@final
279289
def getitem_block_columns(
280290
self, slicer: slice, new_mgr_locs: BlockPlacement
281-
) -> Block:
291+
) -> Self:
282292
"""
283293
Perform __getitem__-like, return result as block.
284294
285295
Only supports slices that preserve dimensionality.
286296
"""
287297
new_values = self._slice(slicer)
288-
289-
if new_values.ndim != self.values.ndim:
290-
raise ValueError("Only same dim slicing is allowed")
291-
292298
return type(self)(new_values, new_mgr_locs, self.ndim, refs=self.refs)
293299

294300
@final
@@ -1997,7 +2003,7 @@ def _slice(
19972003
-------
19982004
ExtensionArray
19992005
"""
2000-
# Notes: ndarray[bool] is only reachable when via getitem_mgr, which
2006+
# Notes: ndarray[bool] is only reachable when via get_rows_with_mask, which
20012007
# is only for Series, i.e. self.ndim == 1.
20022008

20032009
# return same dims as we currently have
@@ -2023,7 +2029,7 @@ def _slice(
20232029
return self.values[slicer]
20242030

20252031
@final
2026-
def getitem_block_index(self, slicer: slice) -> ExtensionBlock:
2032+
def slice_block_rows(self, slicer: slice) -> Self:
20272033
"""
20282034
Perform __getitem__-like specialized to slicing along index.
20292035
"""

pandas/core/internals/concat.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,10 @@ def _get_block_for_concat_plan(
328328
slc = lib.maybe_indices_to_slice(ax0_blk_indexer, max_len)
329329
# TODO: in all extant test cases 2023-04-08 we have a slice here.
330330
# Will this always be the case?
331-
nb = blk.getitem_block(slc)
331+
if isinstance(slc, slice):
332+
nb = blk.slice_block_columns(slc)
333+
else:
334+
nb = blk.take_block_columns(slc)
332335

333336
# assert nb.shape == (len(bp), mgr.shape[1])
334337
return nb

pandas/core/internals/managers.py

+5-15
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
import pandas.core.algorithms as algos
5555
from pandas.core.arrays import DatetimeArray
5656
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
57-
import pandas.core.common as com
5857
from pandas.core.construction import (
5958
ensure_wrapped_if_datetimelike,
6059
extract_array,
@@ -1872,7 +1871,7 @@ def concat_horizontal(cls, mgrs: list[Self], axes: list[Index]) -> Self:
18721871
# We need to do getitem_block here otherwise we would be altering
18731872
# blk.mgr_locs in place, which would render it invalid. This is only
18741873
# relevant in the copy=False case.
1875-
nb = blk.getitem_block(slice(None))
1874+
nb = blk.slice_block_columns(slice(None))
18761875
nb._mgr_locs = nb._mgr_locs.add(offset)
18771876
blocks.append(nb)
18781877

@@ -2018,21 +2017,12 @@ def _blklocs(self):
20182017
"""compat with BlockManager"""
20192018
return None
20202019

2021-
def getitem_mgr(self, indexer: slice | np.ndarray) -> SingleBlockManager:
2020+
def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Self:
20222021
# similar to get_slice, but not restricted to slice indexer
20232022
blk = self._block
2024-
if (
2025-
using_copy_on_write()
2026-
and isinstance(indexer, np.ndarray)
2027-
and len(indexer) > 0
2028-
and com.is_bool_indexer(indexer)
2029-
and indexer.all()
2030-
):
2023+
if using_copy_on_write() and len(indexer) > 0 and indexer.all():
20312024
return type(self)(blk.copy(deep=False), self.index)
2032-
array = blk._slice(indexer)
2033-
if array.ndim > 1:
2034-
# This will be caught by Series._get_values
2035-
raise ValueError("dimension-expanding indexing not allowed")
2025+
array = blk.values[indexer]
20362026

20372027
bp = BlockPlacement(slice(0, len(array)))
20382028
# TODO(CoW) in theory only need to track reference if new_array is a view
@@ -2048,7 +2038,7 @@ def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleBlockManager:
20482038
raise IndexError("Requested axis not found in manager")
20492039

20502040
blk = self._block
2051-
array = blk._slice(slobj)
2041+
array = blk.values[slobj]
20522042
bp = BlockPlacement(slice(0, len(array)))
20532043
# TODO this method is only used in groupby SeriesSplitter at the moment,
20542044
# so passing refs is not yet covered by the tests

pandas/core/series.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,7 @@ def __getitem__(self, key):
10041004
if com.is_bool_indexer(key):
10051005
key = check_bool_indexer(self.index, key)
10061006
key = np.asarray(key, dtype=bool)
1007-
return self._get_values(key)
1007+
return self._get_rows_with_mask(key)
10081008

10091009
return self._get_with(key)
10101010

@@ -1060,8 +1060,8 @@ def _get_values_tuple(self, key: tuple):
10601060
new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
10611061
return new_ser.__finalize__(self)
10621062

1063-
def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series:
1064-
new_mgr = self._mgr.getitem_mgr(indexer)
1063+
def _get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Series:
1064+
new_mgr = self._mgr.get_rows_with_mask(indexer)
10651065
return self._constructor(new_mgr, fastpath=True).__finalize__(self)
10661066

10671067
def _get_value(self, label, takeable: bool = False):

pandas/tests/extension/base/getitem.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ def test_getitem_slice(self, data):
285285
assert isinstance(result, type(data))
286286

287287
def test_getitem_ellipsis_and_slice(self, data):
288-
# GH#40353 this is called from getitem_block_index
288+
# GH#40353 this is called from slice_block_rows
289289
result = data[..., :]
290290
self.assert_extension_array_equal(result, data)
291291

pandas/tests/internals/test_internals.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -942,12 +942,17 @@ def assert_slice_ok(mgr, axis, slobj):
942942

943943
if isinstance(slobj, slice):
944944
sliced = mgr.get_slice(slobj, axis=axis)
945-
elif mgr.ndim == 1 and axis == 0:
946-
sliced = mgr.getitem_mgr(slobj)
945+
elif (
946+
mgr.ndim == 1
947+
and axis == 0
948+
and isinstance(slobj, np.ndarray)
949+
and slobj.dtype == bool
950+
):
951+
sliced = mgr.get_rows_with_mask(slobj)
947952
else:
948953
# BlockManager doesn't support non-slice, SingleBlockManager
949954
# doesn't support axis > 0
950-
return
955+
raise TypeError(slobj)
951956

952957
mat_slobj = (slice(None),) * axis + (slobj,)
953958
tm.assert_numpy_array_equal(
@@ -978,14 +983,6 @@ def assert_slice_ok(mgr, axis, slobj):
978983
mgr, ax, np.array([True, True, False], dtype=np.bool_)
979984
)
980985

981-
# fancy indexer
982-
assert_slice_ok(mgr, ax, [])
983-
assert_slice_ok(mgr, ax, list(range(mgr.shape[ax])))
984-
985-
if mgr.shape[ax] >= 3:
986-
assert_slice_ok(mgr, ax, [0, 1, 2])
987-
assert_slice_ok(mgr, ax, [-1, -2, -3])
988-
989986
@pytest.mark.parametrize("mgr", MANAGERS)
990987
def test_take(self, mgr):
991988
def assert_take_ok(mgr, axis, indexer):

0 commit comments

Comments
 (0)