Skip to content

Commit e7cbe6d

Browse files
authored
BUG: df.iloc[:, :1] with EA column (#32959)
1 parent 31ea45f commit e7cbe6d

File tree

4 files changed

+65
-14
lines changed

4 files changed

+65
-14
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,7 @@ Indexing
467467
- Bug in :meth:`DataFrame.iloc.__setitem__` creating a new array instead of overwriting ``Categorical`` values in-place (:issue:`32831`)
468468
- Bug in :meth:`DataFrame.copy` _item_cache not invalidated after copy causes post-copy value updates to not be reflected (:issue:`31784`)
469469
- Bug in `Series.__getitem__` with an integer key and a :class:`MultiIndex` with leading integer level failing to raise ``KeyError`` if the key is not present in the first level (:issue:`33355`)
470+
- Bug in :meth:`DataFrame.iloc` when slicing a single column-:class:`DataFrame`` with ``ExtensionDtype`` (e.g. ``df.iloc[:, :1]``) returning an invalid result (:issue:`32957`)
470471

471472
Missing
472473
^^^^^^^

pandas/core/internals/blocks.py

+32-14
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ def __setstate__(self, state):
281281

282282
def _slice(self, slicer):
283283
""" return a slice of my values """
284+
284285
return self.values[slicer]
285286

286287
def getitem_block(self, slicer, new_mgr_locs=None):
@@ -1734,14 +1735,40 @@ def _can_hold_element(self, element: Any) -> bool:
17341735
return True
17351736

17361737
def _slice(self, slicer):
1737-
""" return a slice of my values """
1738-
# slice the category
1738+
"""
1739+
Return a slice of my values.
1740+
1741+
Parameters
1742+
----------
1743+
slicer : slice, ndarray[int], or a tuple of these
1744+
Valid (non-reducing) indexer for self.values.
1745+
1746+
Returns
1747+
-------
1748+
np.ndarray or ExtensionArray
1749+
"""
17391750
# return same dims as we currently have
1751+
if not isinstance(slicer, tuple) and self.ndim == 2:
1752+
# reached via getitem_block via _slice_take_blocks_ax0
1753+
# TODO(EA2D): wont be necessary with 2D EAs
1754+
slicer = (slicer, slice(None))
17401755

17411756
if isinstance(slicer, tuple) and len(slicer) == 2:
1742-
if not com.is_null_slice(slicer[0]):
1743-
raise AssertionError("invalid slicing for a 1-ndim categorical")
1744-
slicer = slicer[1]
1757+
first = slicer[0]
1758+
if not isinstance(first, slice):
1759+
raise AssertionError(
1760+
"invalid slicing for a 1-ndim ExtensionArray", first
1761+
)
1762+
# GH#32959 only full-slicers along fake-dim0 are valid
1763+
# TODO(EA2D): wont be necessary with 2D EAs
1764+
new_locs = self.mgr_locs[first]
1765+
if len(new_locs):
1766+
# effectively slice(None)
1767+
slicer = slicer[1]
1768+
else:
1769+
raise AssertionError(
1770+
"invalid slicing for a 1-ndim ExtensionArray", slicer
1771+
)
17451772

17461773
return self.values[slicer]
17471774

@@ -2203,15 +2230,6 @@ def external_values(self):
22032230
# return an object-dtype ndarray of Timestamps.
22042231
return np.asarray(self.values.astype("datetime64[ns]", copy=False))
22052232

2206-
def _slice(self, slicer):
2207-
""" return a slice of my values """
2208-
if isinstance(slicer, tuple):
2209-
col, loc = slicer
2210-
if not com.is_null_slice(col) and col != 0:
2211-
raise IndexError(f"{self} only contains one item")
2212-
return self.values[loc]
2213-
return self.values[slicer]
2214-
22152233
def diff(self, n: int, axis: int = 0) -> List["Block"]:
22162234
"""
22172235
1st discrete difference.

pandas/core/internals/managers.py

+4
Original file line numberDiff line numberDiff line change
@@ -1319,6 +1319,10 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
13191319
blk = self.blocks[0]
13201320

13211321
if sl_type in ("slice", "mask"):
1322+
# GH#32959 EABlock would fail since we cant make 0-width
1323+
# TODO(EA2D): special casing unnecessary with 2D EAs
1324+
if sllen == 0:
1325+
return []
13221326
return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
13231327
elif not allow_fill or self.ndim == 1:
13241328
if allow_fill and fill_value is None:

pandas/tests/extension/base/getitem.py

+28
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,34 @@ def test_iloc_frame(self, data):
4040
result = df.iloc[:4, 0]
4141
self.assert_series_equal(result, expected)
4242

43+
# GH#32959 slice columns with step
44+
result = df.iloc[:, ::2]
45+
self.assert_frame_equal(result, df[["A"]])
46+
result = df[["B", "A"]].iloc[:, ::2]
47+
self.assert_frame_equal(result, df[["B"]])
48+
49+
def test_iloc_frame_single_block(self, data):
50+
# GH#32959 null slice along index, slice along columns with single-block
51+
df = pd.DataFrame({"A": data})
52+
53+
result = df.iloc[:, :]
54+
self.assert_frame_equal(result, df)
55+
56+
result = df.iloc[:, :1]
57+
self.assert_frame_equal(result, df)
58+
59+
result = df.iloc[:, :2]
60+
self.assert_frame_equal(result, df)
61+
62+
result = df.iloc[:, ::2]
63+
self.assert_frame_equal(result, df)
64+
65+
result = df.iloc[:, 1:2]
66+
self.assert_frame_equal(result, df.iloc[:, :0])
67+
68+
result = df.iloc[:, -1:]
69+
self.assert_frame_equal(result, df)
70+
4371
def test_loc_series(self, data):
4472
ser = pd.Series(data)
4573
result = ser.loc[:3]

0 commit comments

Comments
 (0)