diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 1b21983c590e2..587b4d181fa61 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -467,6 +467,7 @@ Indexing - Bug in :meth:`DataFrame.iloc.__setitem__` creating a new array instead of overwriting ``Categorical`` values in-place (:issue:`32831`) - Bug in :meth:`DataFrame.copy` _item_cache not invalidated after copy causes post-copy value updates to not be reflected (:issue:`31784`) - Bug in `Series.__getitem__` with an integer key and a :class:`MultiIndex` with leading integer level failing to raise ``KeyError`` if the key is not present in the first level (:issue:`33355`) +- Bug in :meth:`DataFrame.iloc` when slicing a single column-:class:`DataFrame`` with ``ExtensionDtype`` (e.g. ``df.iloc[:, :1]``) returning an invalid result (:issue:`32957`) Missing ^^^^^^^ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 75c935cdf2e60..d8875b38ed738 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -8,7 +8,7 @@ from pandas._libs import NaT, algos as libalgos, lib, writers import pandas._libs.internals as libinternals -from pandas._libs.tslibs import Timedelta, conversion +from pandas._libs.tslibs import conversion from pandas._libs.tslibs.timezones import tz_compare from pandas._typing import ArrayLike from pandas.util._validators import validate_bool_kwarg @@ -281,6 +281,7 @@ def __setstate__(self, state): def _slice(self, slicer): """ return a slice of my values """ + return self.values[slicer] def getitem_block(self, slicer, new_mgr_locs=None): @@ -1734,14 +1735,40 @@ def _can_hold_element(self, element: Any) -> bool: return True def _slice(self, slicer): - """ return a slice of my values """ - # slice the category + """ + Return a slice of my values. + + Parameters + ---------- + slicer : slice, ndarray[int], or a tuple of these + Valid (non-reducing) indexer for self.values. + + Returns + ------- + np.ndarray or ExtensionArray + """ # return same dims as we currently have + if not isinstance(slicer, tuple) and self.ndim == 2: + # reached via getitem_block via _slice_take_blocks_ax0 + # TODO(EA2D): wont be necessary with 2D EAs + slicer = (slicer, slice(None)) if isinstance(slicer, tuple) and len(slicer) == 2: - if not com.is_null_slice(slicer[0]): - raise AssertionError("invalid slicing for a 1-ndim categorical") - slicer = slicer[1] + first = slicer[0] + if not isinstance(first, slice): + raise AssertionError( + "invalid slicing for a 1-ndim ExtensionArray", first + ) + # GH#32959 only full-slicers along fake-dim0 are valid + # TODO(EA2D): wont be necessary with 2D EAs + new_locs = self.mgr_locs[first] + if len(new_locs): + # effectively slice(None) + slicer = slicer[1] + else: + raise AssertionError( + "invalid slicing for a 1-ndim ExtensionArray", slicer + ) return self.values[slicer] @@ -2203,15 +2230,6 @@ def external_values(self): # return an object-dtype ndarray of Timestamps. return np.asarray(self.values.astype("datetime64[ns]", copy=False)) - def _slice(self, slicer): - """ return a slice of my values """ - if isinstance(slicer, tuple): - col, loc = slicer - if not com.is_null_slice(col) and col != 0: - raise IndexError(f"{self} only contains one item") - return self.values[loc] - return self.values[slicer] - def diff(self, n: int, axis: int = 0) -> List["Block"]: """ 1st discrete difference. diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 9f26cb8b82bb6..46515a90535ed 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1311,6 +1311,10 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default): blk = self.blocks[0] if sl_type in ("slice", "mask"): + # GH#32959 EABlock would fail since we cant make 0-width + # TODO(EA2D): special casing unnecessary with 2D EAs + if sllen == 0: + return [] return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] elif not allow_fill or self.ndim == 1: if allow_fill and fill_value is None: diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 71c7198e32a8b..dc94bffd320b1 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -40,6 +40,34 @@ def test_iloc_frame(self, data): result = df.iloc[:4, 0] self.assert_series_equal(result, expected) + # GH#32959 slice columns with step + result = df.iloc[:, ::2] + self.assert_frame_equal(result, df[["A"]]) + result = df[["B", "A"]].iloc[:, ::2] + self.assert_frame_equal(result, df[["B"]]) + + def test_iloc_frame_single_block(self, data): + # GH#32959 null slice along index, slice along columns with single-block + df = pd.DataFrame({"A": data}) + + result = df.iloc[:, :] + self.assert_frame_equal(result, df) + + result = df.iloc[:, :1] + self.assert_frame_equal(result, df) + + result = df.iloc[:, :2] + self.assert_frame_equal(result, df) + + result = df.iloc[:, ::2] + self.assert_frame_equal(result, df) + + result = df.iloc[:, 1:2] + self.assert_frame_equal(result, df.iloc[:, :0]) + + result = df.iloc[:, -1:] + self.assert_frame_equal(result, df) + def test_loc_series(self, data): ser = pd.Series(data) result = ser.loc[:3]