Skip to content

BUG: df.iloc[:, 0] = df.iloc[::-1, 0] not setting inplace for EAs #45352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 1, 2022
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ Indexing
- Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`)
- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`)
- Bug in indexing setting values into an ``ExtensionDtype`` column with ``df.iloc[:, i] = values`` with ``values`` having the same dtype as ``df.iloc[:, i]`` incorrectly inserting a new array instead of setting in-place (:issue:`33457`)
- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised instead of casting to a common dtype (:issue:`45070`)
- Bug in :meth:`Series.__setitem__` when setting incompatible values into a ``PeriodDtype`` or ``IntervalDtype`` :class:`Series` raising when indexing with a boolean mask but coercing when indexing with otherwise-equivalent indexers; these now consistently coerce, along with :meth:`Series.mask` and :meth:`Series.where` (:issue:`45768`)
- Bug in :meth:`DataFrame.where` with multiple columns with datetime-like dtypes failing to downcast results consistent with other dtypes (:issue:`45837`)
Expand Down
14 changes: 1 addition & 13 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1618,21 +1618,9 @@ def iget(self, i: int | tuple[int, int] | tuple[slice, int]):
return self.values

def set_inplace(self, locs, values: ArrayLike) -> None:
# NB: This is a misnomer, is supposed to be inplace but is not,
# see GH#33457
# When an ndarray, we should have locs.tolist() == [0]
# When a BlockPlacement we should have list(locs) == [0]

# error: Incompatible types in assignment (expression has type
# "Union[ExtensionArray, ndarray[Any, Any]]", variable has type
# "ExtensionArray")
self.values = values # type: ignore[assignment]
try:
# TODO(GH33457) this can be removed
self._cache.clear()
except AttributeError:
# _cache not yet initialized
pass
self.values[:] = values

def _maybe_squeeze_arg(self, arg):
"""
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,14 @@ def test_setitem_series(self, data, full_indexer):
def test_setitem_frame_2d_values(self, data):
# GH#44514
df = pd.DataFrame({"A": data})

# Avoiding using_array_manager fixture
# https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)

df = pd.DataFrame({"A": data})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a duplicate line

blk_data = df._mgr.arrays[0]

orig = df.copy()

df.iloc[:] = df
Expand All @@ -370,6 +378,10 @@ def test_setitem_frame_2d_values(self, data):

df.iloc[:] = df.values
self.assert_frame_equal(df, orig)
if not using_array_manager:
# GH#33457 Check that this setting occurred in-place
# FIXME(ArrayManager): this should work there too
assert df._mgr.arrays[0] is blk_data

df.iloc[:-1] = df.values[:-1]
self.assert_frame_equal(df, orig)
Expand Down
39 changes: 22 additions & 17 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2528,6 +2528,7 @@ def test_dict_nocopy(
return

c = pd.array([1, 2], dtype=any_numeric_ea_dtype)
c_orig = c.copy()
df = DataFrame({"a": a, "b": b, "c": c}, copy=copy)

def get_base(obj):
Expand All @@ -2539,9 +2540,19 @@ def get_base(obj):
else:
raise TypeError

def check_views():
def check_views(c_only: bool = False):
# written to work for either BlockManager or ArrayManager

# Check that the underlying data behind df["c"] is still `c`
# after setting with iloc. Since we don't know which entry in
# df._mgr.arrays corresponds to df["c"], we just check that exactly
# one of these arrays is `c`. GH#38939
assert sum(x is c for x in df._mgr.arrays) == 1
if c_only:
# If we ever stop consolidating in setitem_with_indexer,
# this will become unnecessary.
return

assert (
sum(
get_base(x) is a
Expand All @@ -2563,23 +2574,18 @@ def check_views():
# constructor preserves views
check_views()

# TODO: most of the rest of this test belongs in indexing tests
df.iloc[0, 0] = 0
df.iloc[0, 1] = 0
if not copy:
# Check that the underlying data behind df["c"] is still `c`
# after setting with iloc. Since we don't know which entry in
# df._mgr.arrays corresponds to df["c"], we just check that exactly
# one of these arrays is `c`. GH#38939
assert sum(x is c for x in df._mgr.arrays) == 1
# TODO: we can call check_views if we stop consolidating
# in setitem_with_indexer
check_views(True)

# FIXME(GH#35417): until GH#35417, iloc.setitem into EA values does not preserve
# view, so we have to check in the other direction
# df.iloc[0, 2] = 0
# if not copy:
# check_views()
c[0] = 0
df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype)
assert df.dtypes.iloc[2] == c.dtype
if not copy:
check_views(True)

if copy:
if a.dtype.kind == "M":
Expand All @@ -2589,14 +2595,13 @@ def check_views():
assert a[0] == a.dtype.type(1)
assert b[0] == b.dtype.type(3)
# FIXME(GH#35417): enable after GH#35417
# assert c[0] == 1
assert df.iloc[0, 2] == 1
assert c[0] == c_orig[0] # i.e. df.iloc[0, 2]=45 did *not* update c
else:
# TODO: we can call check_views if we stop consolidating
# in setitem_with_indexer
# FIXME(GH#35417): enable after GH#35417
# assert b[0] == 0
assert df.iloc[0, 2] == 0
assert c[0] == 45 # i.e. df.iloc[0, 2]=45 *did* update c
# TODO: we can check b[0] == 0 if we stop consolidating in
# setitem_with_indexer (except for datetimelike?)

def test_from_series_with_name_with_columns(self):
# GH 7893
Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,16 +875,19 @@ def test_series_indexing_zerodim_np_array(self):
result = s.iloc[np.array(0)]
assert result == 1

@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457")
@td.skip_array_manager_not_yet_implemented
def test_iloc_setitem_categorical_updates_inplace(self):
# Mixed dtype ensures we go through take_split_path in setitem_with_indexer
cat = Categorical(["A", "B", "C"])
df = DataFrame({1: cat, 2: [1, 2, 3]})
df = DataFrame({1: cat, 2: [1, 2, 3]}, copy=False)

assert tm.shares_memory(df[1], cat)

# This should modify our original values in-place
df.iloc[:, 0] = cat[::-1]
assert tm.shares_memory(df[1], cat)

expected = Categorical(["C", "B", "A"])
expected = Categorical(["C", "B", "A"], categories=["A", "B", "C"])
tm.assert_categorical_equal(cat, expected)

def test_iloc_with_boolean_operation(self):
Expand Down