diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index f5dc95590c963..aa16dc9a22f4e 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -817,6 +817,11 @@ def value_counts(self, dropna: bool = True): def __getitem__(self, key): if isinstance(key, tuple): + if len(key) > 1: + if key[0] is Ellipsis: + key = key[1:] + elif key[-1] is Ellipsis: + key = key[:-1] if len(key) > 1: raise IndexError("too many indices for array.") key = key[0] diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 6f7badd3c2cd2..75d9fcd3b4965 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -354,6 +354,15 @@ def __getitem__(self, item: Any) -> Any: "Only integers, slices and integer or " "boolean arrays are valid indices." ) + elif isinstance(item, tuple): + # possibly unpack arr[..., n] to arr[n] + if len(item) == 1: + item = item[0] + elif len(item) == 2: + if item[0] is Ellipsis: + item = item[1] + elif item[1] is Ellipsis: + item = item[0] # We are not an array indexer, so maybe e.g. a slice or integer # indexer. We dispatch to pyarrow. diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d87df9d224bce..79339c74ca4b9 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -309,18 +309,41 @@ def _slice(self, slicer): return self.values[slicer] @final - def getitem_block(self, slicer, new_mgr_locs=None) -> Block: + def getitem_block(self, slicer) -> Block: """ Perform __getitem__-like, return result as block. Only supports slices that preserve dimensionality. """ - if new_mgr_locs is None: - axis0_slicer = slicer[0] if isinstance(slicer, tuple) else slicer - new_mgr_locs = self._mgr_locs[axis0_slicer] - elif not isinstance(new_mgr_locs, BlockPlacement): - new_mgr_locs = BlockPlacement(new_mgr_locs) + axis0_slicer = slicer[0] if isinstance(slicer, tuple) else slicer + new_mgr_locs = self._mgr_locs[axis0_slicer] + + new_values = self._slice(slicer) + + if new_values.ndim != self.values.ndim: + raise ValueError("Only same dim slicing is allowed") + + return type(self)._simple_new(new_values, new_mgr_locs, self.ndim) + @final + def getitem_block_index(self, slicer: slice) -> Block: + """ + Perform __getitem__-like specialized to slicing along index. + + Assumes self.ndim == 2 + """ + # error: Invalid index type "Tuple[ellipsis, slice]" for + # "Union[ndarray, ExtensionArray]"; expected type "Union[int, slice, ndarray]" + new_values = self.values[..., slicer] # type: ignore[index] + return type(self)._simple_new(new_values, self._mgr_locs, ndim=self.ndim) + + @final + def getitem_block_columns(self, slicer, new_mgr_locs: BlockPlacement) -> Block: + """ + Perform __getitem__-like, return result as block. + + Only supports slices that preserve dimensionality. + """ new_values = self._slice(slicer) if new_values.ndim != self.values.ndim: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a12ed69cf0025..0e502c08cb8f2 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -23,6 +23,7 @@ internals as libinternals, lib, ) +from pandas._libs.internals import BlockPlacement from pandas._typing import ( ArrayLike, Dtype, @@ -801,8 +802,7 @@ def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager: if axis == 0: new_blocks = self._slice_take_blocks_ax0(slobj) elif axis == 1: - slicer = (slice(None), slobj) - new_blocks = [blk.getitem_block(slicer) for blk in self.blocks] + new_blocks = [blk.getitem_block_index(slobj) for blk in self.blocks] else: raise IndexError("Requested axis not found in manager") @@ -1396,7 +1396,8 @@ def _slice_take_blocks_ax0( # TODO(EA2D): special casing unnecessary with 2D EAs if sllen == 0: return [] - return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] + bp = BlockPlacement(slice(0, sllen)) + return [blk.getitem_block_columns(slobj, new_mgr_locs=bp)] elif not allow_fill or self.ndim == 1: if allow_fill and fill_value is None: fill_value = blk.fill_value @@ -1405,7 +1406,9 @@ def _slice_take_blocks_ax0( # GH#33597 slice instead of take, so we get # views instead of copies blocks = [ - blk.getitem_block(slice(ml, ml + 1), new_mgr_locs=i) + blk.getitem_block_columns( + slice(ml, ml + 1), new_mgr_locs=BlockPlacement(i) + ) for i, ml in enumerate(slobj) ] # We have @@ -1465,13 +1468,15 @@ def _slice_take_blocks_ax0( taker = lib.maybe_indices_to_slice(taker, max_len) if isinstance(taker, slice): - nb = blk.getitem_block(taker, new_mgr_locs=mgr_locs) + nb = blk.getitem_block_columns(taker, new_mgr_locs=mgr_locs) blocks.append(nb) elif only_slice: # GH#33597 slice instead of take, so we get # views instead of copies for i, ml in zip(taker, mgr_locs): - nb = blk.getitem_block(slice(i, i + 1), new_mgr_locs=ml) + slc = slice(i, i + 1) + bp = BlockPlacement(ml) + nb = blk.getitem_block_columns(slc, new_mgr_locs=bp) # We have np.shares_memory(nb.values, blk.values) blocks.append(nb) else: diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index a7b99c2e09e88..971da37c105bd 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -245,6 +245,26 @@ def test_getitem_slice(self, data): result = data[slice(1)] # scalar assert isinstance(result, type(data)) + def test_getitem_ellipsis_and_slice(self, data): + # GH#40353 this is called from getitem_block_index + result = data[..., :] + self.assert_extension_array_equal(result, data) + + result = data[:, ...] + self.assert_extension_array_equal(result, data) + + result = data[..., :3] + self.assert_extension_array_equal(result, data[:3]) + + result = data[:3, ...] + self.assert_extension_array_equal(result, data[:3]) + + result = data[..., ::2] + self.assert_extension_array_equal(result, data[::2]) + + result = data[::2, ...] + self.assert_extension_array_equal(result, data[::2]) + def test_get(self, data): # GH 20882 s = pd.Series(data, index=[2 * i for i in range(len(data))]) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index ca593da6d97bc..a4fedd9a4c5da 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -83,6 +83,16 @@ def _from_factorized(cls, values, original): return cls([UserDict(x) for x in values if x != ()]) def __getitem__(self, item): + if isinstance(item, tuple): + if len(item) > 1: + if item[0] is Ellipsis: + item = item[1:] + elif item[-1] is Ellipsis: + item = item[:-1] + if len(item) > 1: + raise IndexError("too many indices for array.") + item = item[0] + if isinstance(item, numbers.Integral): return self.data[item] elif isinstance(item, slice) and item == slice(None): diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 1728c31ebf767..a8c9a7a22ecdc 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -848,22 +848,27 @@ def assert_slice_ok(mgr, axis, slobj): assert_slice_ok(mgr, ax, slice(1, 4)) assert_slice_ok(mgr, ax, slice(3, 0, -2)) - # boolean mask - assert_slice_ok(mgr, ax, np.array([], dtype=np.bool_)) - assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_)) - assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_)) - - if mgr.shape[ax] >= 3: - assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0) - assert_slice_ok(mgr, ax, np.array([True, True, False], dtype=np.bool_)) + if mgr.ndim < 2: + # 2D only support slice objects + + # boolean mask + assert_slice_ok(mgr, ax, np.array([], dtype=np.bool_)) + assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_)) + assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_)) + + if mgr.shape[ax] >= 3: + assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0) + assert_slice_ok( + mgr, ax, np.array([True, True, False], dtype=np.bool_) + ) - # fancy indexer - assert_slice_ok(mgr, ax, []) - assert_slice_ok(mgr, ax, list(range(mgr.shape[ax]))) + # fancy indexer + assert_slice_ok(mgr, ax, []) + assert_slice_ok(mgr, ax, list(range(mgr.shape[ax]))) - if mgr.shape[ax] >= 3: - assert_slice_ok(mgr, ax, [0, 1, 2]) - assert_slice_ok(mgr, ax, [-1, -2, -3]) + if mgr.shape[ax] >= 3: + assert_slice_ok(mgr, ax, [0, 1, 2]) + assert_slice_ok(mgr, ax, [-1, -2, -3]) @pytest.mark.parametrize("mgr", MANAGERS) def test_take(self, mgr):