pandas-dev · jreback · Mar 16, 2021 · Mar 6, 2021 · Mar 6, 2021 · Mar 6, 2021
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -817,6 +817,8 @@ def value_counts(self, dropna: bool = True):
     def __getitem__(self, key):
 
         if isinstance(key, tuple):
+            if len(key) and key[0] is Ellipsis:
+                key = key[1:]
             if len(key) > 1:
                 raise IndexError("too many indices for array.")
             key = key[0]

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
@@ -354,6 +354,15 @@ def __getitem__(self, item: Any) -> Any:
                     "Only integers, slices and integer or "
                     "boolean arrays are valid indices."
                 )
+        elif isinstance(item, tuple):
+            # possibly unpack arr[..., n] to arr[n]
+            if len(item) == 1:
+                item = item[0]
+            elif len(item) == 2:
+                if item[0] is Ellipsis:
+                    item = item[1]
+                elif item[1] is Ellipsis:
+                    item = item[0]
 
         # We are not an array indexer, so maybe e.g. a slice or integer
         # indexer. We dispatch to pyarrow.

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -1012,19 +1012,27 @@ def __iter__(self):
 
         starts, ends = lib.generate_slices(self.slabels, self.ngroups)
 
-        for i, (start, end) in enumerate(zip(starts, ends)):
-            yield i, self._chop(sdata, slice(start, end))
+        if self.axis == 0:
+            for i, (start, end) in enumerate(zip(starts, ends)):
+                yield i, self._chop_index(sdata, slice(start, end))
+
+        else:
+            for i, (start, end) in enumerate(zip(starts, ends)):
+                yield i, self._chop_columns(sdata, slice(start, end))
 
     @cache_readonly
     def sorted_data(self) -> FrameOrSeries:
         return self.data.take(self.sort_idx, axis=self.axis)
 
-    def _chop(self, sdata, slice_obj: slice) -> NDFrame:
+    def _chop_columns(self, sdata, slice_obj: slice) -> NDFrame:
+        raise AbstractMethodError(self)
+
+    def _chop_index(self, sdata, slice_obj: slice) -> NDFrame:
         raise AbstractMethodError(self)
 
 
 class SeriesSplitter(DataSplitter):
-    def _chop(self, sdata: Series, slice_obj: slice) -> Series:
+    def _chop_index(self, sdata: Series, slice_obj: slice) -> Series:
         # fastpath equivalent to `sdata.iloc[slice_obj]`
         mgr = sdata._mgr.get_slice(slice_obj)
         # __finalize__ not called here, must be applied by caller if applicable
@@ -1043,13 +1051,23 @@ def fast_apply(self, f: F, sdata: FrameOrSeries, names):
         starts, ends = lib.generate_slices(self.slabels, self.ngroups)
         return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
 
-    def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
-        # Fastpath equivalent to:
-        # if self.axis == 0:
-        #     return sdata.iloc[slice_obj]
-        # else:
-        #     return sdata.iloc[:, slice_obj]
-        mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis)
+    def _chop_index(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
+        """
+        Fastpath equivalent to `sdata.iloc[slice_obj]`
+        """
+        mgr = sdata._mgr.get_slice_index(slice_obj)
+        # __finalize__ not called here, must be applied by caller if applicable
+
+        # fastpath equivalent to `return sdata._constructor(mgr)`
+        obj = type(sdata)._from_mgr(mgr)
+        object.__setattr__(obj, "_flags", sdata._flags)
+        return obj
+
+    def _chop_columns(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
+        """
+        Fastpath equivalent to `sdata.iloc[:, slice_obj]`
+        """
+        mgr = sdata._mgr.get_slice(slice_obj, axis=0)
         # __finalize__ not called here, must be applied by caller if applicable
 
         # fastpath equivalent to `return sdata._constructor(mgr)`

diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py
@@ -122,6 +122,15 @@ def apply(
     def isna(self: T, func) -> T:
         return self.apply("apply", func=func)
 
+    def get_slice(self: T, slobj: slice, axis: int = 0) -> T:
+        raise AbstractMethodError(self)
+
+    def get_slice_index(self: T, slobj: slice) -> T:
+        """
+        get_slice specialized to axis=self.ndim-1
+        """
+        return self.get_slice(slobj, axis=self.ndim - 1)
+
 
 class SingleDataManager(DataManager):
     ndim = 1

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -322,18 +322,41 @@ def _slice(self, slicer):
         return self.values[slicer]
 
     @final
-    def getitem_block(self, slicer, new_mgr_locs=None) -> Block:
+    def getitem_block(self, slicer) -> Block:
         """
         Perform __getitem__-like, return result as block.
 
         Only supports slices that preserve dimensionality.
         """
-        if new_mgr_locs is None:
-            axis0_slicer = slicer[0] if isinstance(slicer, tuple) else slicer
-            new_mgr_locs = self.mgr_locs[axis0_slicer]
-        elif not isinstance(new_mgr_locs, BlockPlacement):
-            new_mgr_locs = BlockPlacement(new_mgr_locs)
+        axis0_slicer = slicer[0] if isinstance(slicer, tuple) else slicer
+        new_mgr_locs = self.mgr_locs[axis0_slicer]
+
+        new_values = self._slice(slicer)
+
+        if new_values.ndim != self.values.ndim:
+            raise ValueError("Only same dim slicing is allowed")
+
+        return type(self)._simple_new(new_values, new_mgr_locs, self.ndim)
 
+    @final
+    def getitem_block_index(self, slicer: slice) -> Block:
+        """
+        Perform __getitem__-like specialized to slicing along index.
+
+        Assumes self.ndim == 2
+        """
+        # error: Invalid index type "Tuple[ellipsis, slice]" for
+        # "Union[ndarray, ExtensionArray]"; expected type "Union[int, slice, ndarray]"
+        new_values = self.values[..., slicer]  # type: ignore[index]
+        return type(self)._simple_new(new_values, self._mgr_locs, ndim=self.ndim)
+
+    @final
+    def getitem_block_columns(self, slicer, new_mgr_locs: BlockPlacement) -> Block:
+        """
+        Perform __getitem__-like, return result as block.
+
+        Only supports slices that preserve dimensionality.
+        """
         new_values = self._slice(slicer)
 
         if new_values.ndim != self.values.ndim:

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -23,6 +23,7 @@
     internals as libinternals,
     lib,
 )
+from pandas._libs.internals import BlockPlacement
 from pandas._typing import (
     ArrayLike,
     Dtype,
@@ -809,6 +810,15 @@ def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager:
 
         return type(self)._simple_new(tuple(new_blocks), new_axes)
 
+    def get_slice_index(self, slobj: slice) -> BlockManager:
+        # get_slice specialized to axis = 1 and ndim == 2
+        new_blocks = [blk.getitem_block_index(slobj) for blk in self.blocks]
+
+        axes = self.axes
+        new_axes = [axes[0], axes[1]._getitem_slice(slobj)]
+
+        return type(self)._simple_new(tuple(new_blocks), new_axes)
+
     @property
     def nblocks(self) -> int:
         return len(self.blocks)
@@ -1391,7 +1401,8 @@ def _slice_take_blocks_ax0(
                 if sllen == 0:
                     return []
                 # TODO: tests all have isinstance(slobj, slice), other possibilities?
-                return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
+                bp = BlockPlacement(slice(0, sllen))
+                return [blk.getitem_block_columns(slobj, new_mgr_locs=bp)]
             elif not allow_fill or self.ndim == 1:
                 if allow_fill and fill_value is None:
                     fill_value = blk.fill_value
@@ -1400,7 +1411,9 @@ def _slice_take_blocks_ax0(
                     # GH#33597 slice instead of take, so we get
                     #  views instead of copies
                     blocks = [
-                        blk.getitem_block(slice(ml, ml + 1), new_mgr_locs=i)
+                        blk.getitem_block_columns(
+                            slice(ml, ml + 1), new_mgr_locs=BlockPlacement(i)
+                        )
                         for i, ml in enumerate(slobj)
                     ]
                     # We have
@@ -1460,13 +1473,15 @@ def _slice_take_blocks_ax0(
                         taker = lib.maybe_indices_to_slice(taker, max_len)
 
                     if isinstance(taker, slice):
-                        nb = blk.getitem_block(taker, new_mgr_locs=mgr_locs)
+                        nb = blk.getitem_block_columns(taker, new_mgr_locs=mgr_locs)
                         blocks.append(nb)
                     elif only_slice:
                         # GH#33597 slice instead of take, so we get
                         #  views instead of copies
                         for i, ml in zip(taker, mgr_locs):
-                            nb = blk.getitem_block(slice(i, i + 1), new_mgr_locs=ml)
+                            slc = slice(i, i + 1)
+                            bp = BlockPlacement(ml)
+                            nb = blk.getitem_block_columns(slc, new_mgr_locs=bp)
                             # We have np.shares_memory(nb.values, blk.values)
                             blocks.append(nb)
                     else:

diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
@@ -245,6 +245,17 @@ def test_getitem_slice(self, data):
         result = data[slice(1)]  # scalar
         assert isinstance(result, type(data))
 
+    def test_getitem_ellipsis_and_slice(self, data):
+        # GH#40353 this is called from getitem_block_index
+        result = data[..., :]
+        self.assert_extension_array_equal(result, data)
+
+        result = data[..., :3]
+        self.assert_extension_array_equal(result, data[:3])
+
+        result = data[..., ::2]
+        self.assert_extension_array_equal(result, data[::2])
+
     def test_get(self, data):
         # GH 20882
         s = pd.Series(data, index=[2 * i for i in range(len(data))])

diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
@@ -83,6 +83,13 @@ def _from_factorized(cls, values, original):
         return cls([UserDict(x) for x in values if x != ()])
 
     def __getitem__(self, item):
+        if isinstance(item, tuple):
+            if len(item) and item[0] is Ellipsis:
+                item = item[1:]
+            if len(item) > 1:
+                raise IndexError("too many indices for array.")
+            item = item[0]
+
         if isinstance(item, numbers.Integral):
             return self.data[item]
         elif isinstance(item, slice) and item == slice(None):

diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
@@ -848,22 +848,27 @@ def assert_slice_ok(mgr, axis, slobj):
             assert_slice_ok(mgr, ax, slice(1, 4))
             assert_slice_ok(mgr, ax, slice(3, 0, -2))
 
-            # boolean mask
-            assert_slice_ok(mgr, ax, np.array([], dtype=np.bool_))
-            assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_))
-            assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_))
-
-            if mgr.shape[ax] >= 3:
-                assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0)
-                assert_slice_ok(mgr, ax, np.array([True, True, False], dtype=np.bool_))
+            if mgr.ndim < 2:
+                # 2D only support slice objects
+
+                # boolean mask
+                assert_slice_ok(mgr, ax, np.array([], dtype=np.bool_))
+                assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_))
+                assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_))
+
+                if mgr.shape[ax] >= 3:
+                    assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0)
+                    assert_slice_ok(
+                        mgr, ax, np.array([True, True, False], dtype=np.bool_)
+                    )
 
-            # fancy indexer
-            assert_slice_ok(mgr, ax, [])
-            assert_slice_ok(mgr, ax, list(range(mgr.shape[ax])))
+                # fancy indexer
+                assert_slice_ok(mgr, ax, [])
+                assert_slice_ok(mgr, ax, list(range(mgr.shape[ax])))
 
-            if mgr.shape[ax] >= 3:
-                assert_slice_ok(mgr, ax, [0, 1, 2])
-                assert_slice_ok(mgr, ax, [-1, -2, -3])
+                if mgr.shape[ax] >= 3:
+                    assert_slice_ok(mgr, ax, [0, 1, 2])
+                    assert_slice_ok(mgr, ax, [-1, -2, -3])
 
     @pytest.mark.parametrize("mgr", MANAGERS)
     def test_take(self, mgr):