Skip to content

BUG: df.iloc[:, :1] with EA column #32959

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Apr 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ Indexing
- Bug in :meth:`DataFrame.iloc.__setitem__` creating a new array instead of overwriting ``Categorical`` values in-place (:issue:`32831`)
- Bug in :meth:`DataFrame.copy` _item_cache not invalidated after copy causes post-copy value updates to not be reflected (:issue:`31784`)
- Bug in `Series.__getitem__` with an integer key and a :class:`MultiIndex` with leading integer level failing to raise ``KeyError`` if the key is not present in the first level (:issue:`33355`)
- Bug in :meth:`DataFrame.iloc` when slicing a single column-:class:`DataFrame`` with ``ExtensionDtype`` (e.g. ``df.iloc[:, :1]``) returning an invalid result (:issue:`32957`)

Missing
^^^^^^^
Expand Down
48 changes: 33 additions & 15 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from pandas._libs import NaT, algos as libalgos, lib, writers
import pandas._libs.internals as libinternals
from pandas._libs.tslibs import Timedelta, conversion
from pandas._libs.tslibs import conversion
from pandas._libs.tslibs.timezones import tz_compare
from pandas._typing import ArrayLike
from pandas.util._validators import validate_bool_kwarg
Expand Down Expand Up @@ -281,6 +281,7 @@ def __setstate__(self, state):

def _slice(self, slicer):
""" return a slice of my values """

return self.values[slicer]

def getitem_block(self, slicer, new_mgr_locs=None):
Expand Down Expand Up @@ -1734,14 +1735,40 @@ def _can_hold_element(self, element: Any) -> bool:
return True

def _slice(self, slicer):
""" return a slice of my values """
# slice the category
"""
Return a slice of my values.

Parameters
----------
slicer : slice, ndarray[int], or a tuple of these
Valid (non-reducing) indexer for self.values.

Returns
-------
np.ndarray or ExtensionArray
"""
# return same dims as we currently have
if not isinstance(slicer, tuple) and self.ndim == 2:
# reached via getitem_block via _slice_take_blocks_ax0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could also put this check in getitem_block or _slice_take_blocks_ax0 to ensure that _slice always gets the correct tuple of slices?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or, directly check the value? As here you convert it to a len-2 tuple, and then in a next step, you convert the tuple back to a slice?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm one way to avoid this PITA would be to have 2D EAs...

# TODO(EA2D): wont be necessary with 2D EAs
slicer = (slicer, slice(None))

if isinstance(slicer, tuple) and len(slicer) == 2:
if not com.is_null_slice(slicer[0]):
raise AssertionError("invalid slicing for a 1-ndim categorical")
slicer = slicer[1]
first = slicer[0]
if not isinstance(first, slice):
raise AssertionError(
"invalid slicing for a 1-ndim ExtensionArray", first
)
# GH#32959 only full-slicers along fake-dim0 are valid
# TODO(EA2D): wont be necessary with 2D EAs
new_locs = self.mgr_locs[first]
if len(new_locs):
# effectively slice(None)
slicer = slicer[1]
else:
raise AssertionError(
"invalid slicing for a 1-ndim ExtensionArray", slicer
)

return self.values[slicer]

Expand Down Expand Up @@ -2203,15 +2230,6 @@ def external_values(self):
# return an object-dtype ndarray of Timestamps.
return np.asarray(self.values.astype("datetime64[ns]", copy=False))

def _slice(self, slicer):
""" return a slice of my values """
if isinstance(slicer, tuple):
col, loc = slicer
if not com.is_null_slice(col) and col != 0:
raise IndexError(f"{self} only contains one item")
return self.values[loc]
return self.values[slicer]

def diff(self, n: int, axis: int = 0) -> List["Block"]:
"""
1st discrete difference.
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1311,6 +1311,10 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
blk = self.blocks[0]

if sl_type in ("slice", "mask"):
# GH#32959 EABlock would fail since we cant make 0-width
# TODO(EA2D): special casing unnecessary with 2D EAs
if sllen == 0:
return []
return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
elif not allow_fill or self.ndim == 1:
if allow_fill and fill_value is None:
Expand Down
28 changes: 28 additions & 0 deletions pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,34 @@ def test_iloc_frame(self, data):
result = df.iloc[:4, 0]
self.assert_series_equal(result, expected)

# GH#32959 slice columns with step
result = df.iloc[:, ::2]
self.assert_frame_equal(result, df[["A"]])
result = df[["B", "A"]].iloc[:, ::2]
self.assert_frame_equal(result, df[["B"]])

def test_iloc_frame_single_block(self, data):
# GH#32959 null slice along index, slice along columns with single-block
df = pd.DataFrame({"A": data})

result = df.iloc[:, :]
self.assert_frame_equal(result, df)

result = df.iloc[:, :1]
self.assert_frame_equal(result, df)

result = df.iloc[:, :2]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add some df.iloc[:, -1:] as well

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated+green

self.assert_frame_equal(result, df)

result = df.iloc[:, ::2]
self.assert_frame_equal(result, df)

result = df.iloc[:, 1:2]
self.assert_frame_equal(result, df.iloc[:, :0])

result = df.iloc[:, -1:]
self.assert_frame_equal(result, df)

def test_loc_series(self, data):
ser = pd.Series(data)
result = ser.loc[:3]
Expand Down