Skip to content

Commit bdef5ad

Browse files
jbrockmendelyehoshuadimarsky
authored andcommitted
BUG: df.iloc[:, 0] = df.iloc[::-1, 0] not setting inplace for EAs (pandas-dev#45352)
1 parent 4932554 commit bdef5ad

File tree

5 files changed

+42
-33
lines changed

5 files changed

+42
-33
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ Indexing
360360
- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
361361
- Bug in :meth:`Series.align` does not create :class:`MultiIndex` with union of levels when both MultiIndexes intersections are identical (:issue:`45224`)
362362
- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`)
363+
- Bug in indexing setting values into an ``ExtensionDtype`` column with ``df.iloc[:, i] = values`` with ``values`` having the same dtype as ``df.iloc[:, i]`` incorrectly inserting a new array instead of setting in-place (:issue:`33457`)
363364
- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised instead of casting to a common dtype (:issue:`45070`)
364365
- Bug in :meth:`Series.__setitem__` when setting incompatible values into a ``PeriodDtype`` or ``IntervalDtype`` :class:`Series` raising when indexing with a boolean mask but coercing when indexing with otherwise-equivalent indexers; these now consistently coerce, along with :meth:`Series.mask` and :meth:`Series.where` (:issue:`45768`)
365366
- Bug in :meth:`DataFrame.where` with multiple columns with datetime-like dtypes failing to downcast results consistent with other dtypes (:issue:`45837`)

pandas/core/internals/blocks.py

+1-13
Original file line numberDiff line numberDiff line change
@@ -1636,21 +1636,9 @@ def iget(self, i: int | tuple[int, int] | tuple[slice, int]):
16361636
return self.values
16371637

16381638
def set_inplace(self, locs, values: ArrayLike) -> None:
1639-
# NB: This is a misnomer, is supposed to be inplace but is not,
1640-
# see GH#33457
16411639
# When an ndarray, we should have locs.tolist() == [0]
16421640
# When a BlockPlacement we should have list(locs) == [0]
1643-
1644-
# error: Incompatible types in assignment (expression has type
1645-
# "Union[ExtensionArray, ndarray[Any, Any]]", variable has type
1646-
# "ExtensionArray")
1647-
self.values = values # type: ignore[assignment]
1648-
try:
1649-
# TODO(GH33457) this can be removed
1650-
self._cache.clear()
1651-
except AttributeError:
1652-
# _cache not yet initialized
1653-
pass
1641+
self.values[:] = values
16541642

16551643
def _maybe_squeeze_arg(self, arg):
16561644
"""

pandas/tests/extension/base/setitem.py

+12
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,14 @@ def test_setitem_series(self, data, full_indexer):
360360
def test_setitem_frame_2d_values(self, data):
361361
# GH#44514
362362
df = pd.DataFrame({"A": data})
363+
364+
# Avoiding using_array_manager fixture
365+
# https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
366+
using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
367+
368+
df = pd.DataFrame({"A": data})
369+
blk_data = df._mgr.arrays[0]
370+
363371
orig = df.copy()
364372

365373
df.iloc[:] = df
@@ -370,6 +378,10 @@ def test_setitem_frame_2d_values(self, data):
370378

371379
df.iloc[:] = df.values
372380
self.assert_frame_equal(df, orig)
381+
if not using_array_manager:
382+
# GH#33457 Check that this setting occurred in-place
383+
# FIXME(ArrayManager): this should work there too
384+
assert df._mgr.arrays[0] is blk_data
373385

374386
df.iloc[:-1] = df.values[:-1]
375387
self.assert_frame_equal(df, orig)

pandas/tests/frame/test_constructors.py

+22-17
Original file line numberDiff line numberDiff line change
@@ -2519,6 +2519,7 @@ def test_dict_nocopy(
25192519
return
25202520

25212521
c = pd.array([1, 2], dtype=any_numeric_ea_dtype)
2522+
c_orig = c.copy()
25222523
df = DataFrame({"a": a, "b": b, "c": c}, copy=copy)
25232524

25242525
def get_base(obj):
@@ -2530,9 +2531,19 @@ def get_base(obj):
25302531
else:
25312532
raise TypeError
25322533

2533-
def check_views():
2534+
def check_views(c_only: bool = False):
25342535
# written to work for either BlockManager or ArrayManager
2536+
2537+
# Check that the underlying data behind df["c"] is still `c`
2538+
# after setting with iloc. Since we don't know which entry in
2539+
# df._mgr.arrays corresponds to df["c"], we just check that exactly
2540+
# one of these arrays is `c`. GH#38939
25352541
assert sum(x is c for x in df._mgr.arrays) == 1
2542+
if c_only:
2543+
# If we ever stop consolidating in setitem_with_indexer,
2544+
# this will become unnecessary.
2545+
return
2546+
25362547
assert (
25372548
sum(
25382549
get_base(x) is a
@@ -2554,23 +2565,18 @@ def check_views():
25542565
# constructor preserves views
25552566
check_views()
25562567

2568+
# TODO: most of the rest of this test belongs in indexing tests
25572569
df.iloc[0, 0] = 0
25582570
df.iloc[0, 1] = 0
25592571
if not copy:
2560-
# Check that the underlying data behind df["c"] is still `c`
2561-
# after setting with iloc. Since we don't know which entry in
2562-
# df._mgr.arrays corresponds to df["c"], we just check that exactly
2563-
# one of these arrays is `c`. GH#38939
2564-
assert sum(x is c for x in df._mgr.arrays) == 1
2565-
# TODO: we can call check_views if we stop consolidating
2566-
# in setitem_with_indexer
2572+
check_views(True)
25672573

25682574
# FIXME(GH#35417): until GH#35417, iloc.setitem into EA values does not preserve
25692575
# view, so we have to check in the other direction
2570-
# df.iloc[0, 2] = 0
2571-
# if not copy:
2572-
# check_views()
2573-
c[0] = 0
2576+
df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype)
2577+
assert df.dtypes.iloc[2] == c.dtype
2578+
if not copy:
2579+
check_views(True)
25742580

25752581
if copy:
25762582
if a.dtype.kind == "M":
@@ -2580,14 +2586,13 @@ def check_views():
25802586
assert a[0] == a.dtype.type(1)
25812587
assert b[0] == b.dtype.type(3)
25822588
# FIXME(GH#35417): enable after GH#35417
2583-
# assert c[0] == 1
2584-
assert df.iloc[0, 2] == 1
2589+
assert c[0] == c_orig[0] # i.e. df.iloc[0, 2]=45 did *not* update c
25852590
else:
25862591
# TODO: we can call check_views if we stop consolidating
25872592
# in setitem_with_indexer
2588-
# FIXME(GH#35417): enable after GH#35417
2589-
# assert b[0] == 0
2590-
assert df.iloc[0, 2] == 0
2593+
assert c[0] == 45 # i.e. df.iloc[0, 2]=45 *did* update c
2594+
# TODO: we can check b[0] == 0 if we stop consolidating in
2595+
# setitem_with_indexer (except for datetimelike?)
25912596

25922597
def test_from_series_with_name_with_columns(self):
25932598
# GH 7893

pandas/tests/indexing/test_iloc.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -875,16 +875,19 @@ def test_series_indexing_zerodim_np_array(self):
875875
result = s.iloc[np.array(0)]
876876
assert result == 1
877877

878-
@pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457")
878+
@td.skip_array_manager_not_yet_implemented
879879
def test_iloc_setitem_categorical_updates_inplace(self):
880880
# Mixed dtype ensures we go through take_split_path in setitem_with_indexer
881881
cat = Categorical(["A", "B", "C"])
882-
df = DataFrame({1: cat, 2: [1, 2, 3]})
882+
df = DataFrame({1: cat, 2: [1, 2, 3]}, copy=False)
883+
884+
assert tm.shares_memory(df[1], cat)
883885

884886
# This should modify our original values in-place
885887
df.iloc[:, 0] = cat[::-1]
888+
assert tm.shares_memory(df[1], cat)
886889

887-
expected = Categorical(["C", "B", "A"])
890+
expected = Categorical(["C", "B", "A"], categories=["A", "B", "C"])
888891
tm.assert_categorical_equal(cat, expected)
889892

890893
def test_iloc_with_boolean_operation(self):

0 commit comments

Comments
 (0)