Skip to content

Commit 33f67d9

Browse files
authored
BUG: iloc.__setitem__ with duplicate columns (#32477)
1 parent 4b99525 commit 33f67d9

File tree

6 files changed

+80
-21
lines changed

6 files changed

+80
-21
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ Indexing
263263
- Bug in :meth:`Series.xs` incorrectly returning ``Timestamp`` instead of ``datetime64`` in some object-dtype cases (:issue:`31630`)
264264
- Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`)
265265
- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
266-
-
266+
- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
267267

268268
Missing
269269
^^^^^^^

pandas/core/frame.py

+14
Original file line numberDiff line numberDiff line change
@@ -2708,6 +2708,20 @@ def _setitem_frame(self, key, value):
27082708
self._check_setitem_copy()
27092709
self._where(-key, value, inplace=True)
27102710

2711+
def _iset_item(self, loc: int, value):
2712+
self._ensure_valid_index(value)
2713+
2714+
# technically _sanitize_column expects a label, not a position,
2715+
# but the behavior is the same as long as we pass broadcast=False
2716+
value = self._sanitize_column(loc, value, broadcast=False)
2717+
NDFrame._iset_item(self, loc, value)
2718+
2719+
# check if we are modifying a copy
2720+
# try to set first as we want an invalid
2721+
# value exception to occur first
2722+
if len(self):
2723+
self._check_setitem_copy()
2724+
27112725
def _set_item(self, key, value):
27122726
"""
27132727
Add series to DataFrame in specified column.

pandas/core/generic.py

+4
Original file line numberDiff line numberDiff line change
@@ -3579,6 +3579,10 @@ def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries:
35793579
result._set_is_copy(self, copy=is_copy)
35803580
return result
35813581

3582+
def _iset_item(self, loc: int, value) -> None:
3583+
self._data.iset(loc, value)
3584+
self._clear_item_cache()
3585+
35823586
def _set_item(self, key, value) -> None:
35833587
self._data.set(key, value)
35843588
self._clear_item_cache()

pandas/core/indexing.py

+27-17
Original file line numberDiff line numberDiff line change
@@ -1615,6 +1615,12 @@ def _setitem_with_indexer(self, indexer, value):
16151615
info_idx = [info_idx]
16161616
labels = item_labels[info_idx]
16171617

1618+
# Ensure we have something we can iterate over
1619+
ilocs = info_idx
1620+
if isinstance(info_idx, slice):
1621+
ri = Index(range(len(self.obj.columns)))
1622+
ilocs = ri[info_idx]
1623+
16181624
plane_indexer = indexer[:1]
16191625
lplane_indexer = length_of_indexer(plane_indexer[0], self.obj.index)
16201626
# lplane_indexer gives the expected length of obj[indexer[0]]
@@ -1632,9 +1638,11 @@ def _setitem_with_indexer(self, indexer, value):
16321638
"length than the value"
16331639
)
16341640

1635-
def setter(item, v):
1636-
ser = self.obj[item]
1637-
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
1641+
pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer
1642+
1643+
def isetter(loc, v):
1644+
# positional setting on column loc
1645+
ser = self.obj._ixs(loc, axis=1)
16381646

16391647
# perform the equivalent of a setitem on the info axis
16401648
# as we have a null slice or a slice with full bounds
@@ -1654,7 +1662,7 @@ def setter(item, v):
16541662
ser._maybe_update_cacher(clear=True)
16551663

16561664
# reset the sliced object if unique
1657-
self.obj[item] = ser
1665+
self.obj._iset_item(loc, ser)
16581666

16591667
# we need an iterable, with a ndim of at least 1
16601668
# eg. don't pass through np.array(0)
@@ -1664,8 +1672,10 @@ def setter(item, v):
16641672
if isinstance(value, ABCDataFrame):
16651673
sub_indexer = list(indexer)
16661674
multiindex_indexer = isinstance(labels, ABCMultiIndex)
1675+
# TODO: we are implicitly assuming value.columns is unique
16671676

1668-
for item in labels:
1677+
for loc in ilocs:
1678+
item = item_labels[loc]
16691679
if item in value:
16701680
sub_indexer[info_axis] = item
16711681
v = self._align_series(
@@ -1674,7 +1684,7 @@ def setter(item, v):
16741684
else:
16751685
v = np.nan
16761686

1677-
setter(item, v)
1687+
isetter(loc, v)
16781688

16791689
# we have an equal len ndarray/convertible to our labels
16801690
# hasattr first, to avoid coercing to ndarray without reason.
@@ -1685,44 +1695,44 @@ def setter(item, v):
16851695
# note that this coerces the dtype if we are mixed
16861696
# GH 7551
16871697
value = np.array(value, dtype=object)
1688-
if len(labels) != value.shape[1]:
1698+
if len(ilocs) != value.shape[1]:
16891699
raise ValueError(
16901700
"Must have equal len keys and value "
16911701
"when setting with an ndarray"
16921702
)
16931703

1694-
for i, item in enumerate(labels):
1695-
1704+
for i, loc in enumerate(ilocs):
16961705
# setting with a list, re-coerces
1697-
setter(item, value[:, i].tolist())
1706+
isetter(loc, value[:, i].tolist())
16981707

16991708
elif (
17001709
len(labels) == 1
17011710
and lplane_indexer == len(value)
17021711
and not is_scalar(plane_indexer[0])
17031712
):
17041713
# we have an equal len list/ndarray
1705-
setter(labels[0], value)
1714+
# We only get here with len(labels) == len(ilocs) == 1
1715+
isetter(ilocs[0], value)
17061716

17071717
elif lplane_indexer == 0 and len(value) == len(self.obj.index):
17081718
# We get here in one case via .loc with a all-False mask
17091719
pass
17101720

17111721
else:
17121722
# per-label values
1713-
if len(labels) != len(value):
1723+
if len(ilocs) != len(value):
17141724
raise ValueError(
17151725
"Must have equal len keys and value "
17161726
"when setting with an iterable"
17171727
)
17181728

1719-
for item, v in zip(labels, value):
1720-
setter(item, v)
1729+
for loc, v in zip(ilocs, value):
1730+
isetter(loc, v)
17211731
else:
17221732

1723-
# scalar
1724-
for item in labels:
1725-
setter(item, value)
1733+
# scalar value
1734+
for loc in ilocs:
1735+
isetter(loc, value)
17261736

17271737
else:
17281738
if isinstance(indexer, tuple):

pandas/core/internals/managers.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1096,7 +1096,10 @@ def value_getitem(placement):
10961096
"Shape of new values must be compatible with manager shape"
10971097
)
10981098

1099-
if isinstance(loc, int):
1099+
if lib.is_integer(loc):
1100+
# We have 6 tests where loc is _not_ an int.
1101+
# In this case, get_blkno_placements will yield only one tuple,
1102+
# containing (self._blknos[loc], BlockPlacement(slice(0, 1, 1)))
11001103
loc = [loc]
11011104

11021105
# Accessing public blknos ensures the public versions are initialized
@@ -1148,7 +1151,7 @@ def value_getitem(placement):
11481151
# one item.
11491152
new_blocks.extend(
11501153
make_block(
1151-
values=value.copy(),
1154+
values=value,
11521155
ndim=self.ndim,
11531156
placement=slice(mgr_loc, mgr_loc + 1),
11541157
)

pandas/tests/indexing/test_iloc.py

+29-1
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,6 @@ def test_iloc_setitem_dups(self):
349349
df = concat([df1, df2], axis=1)
350350

351351
expected = df.fillna(3)
352-
expected["A"] = expected["A"].astype("float64")
353352
inds = np.isnan(df.iloc[:, 0])
354353
mask = inds[inds].index
355354
df.iloc[mask, 0] = df.iloc[mask, 2]
@@ -694,3 +693,32 @@ def test_series_indexing_zerodim_np_array(self):
694693
s = Series([1, 2])
695694
result = s.iloc[np.array(0)]
696695
assert result == 1
696+
697+
698+
class TestILocSetItemDuplicateColumns:
699+
def test_iloc_setitem_scalar_duplicate_columns(self):
700+
# GH#15686, duplicate columns and mixed dtype
701+
df1 = pd.DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}])
702+
df2 = pd.DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}])
703+
df = pd.concat([df1, df2], axis=1)
704+
df.iloc[0, 0] = -1
705+
706+
assert df.iloc[0, 0] == -1
707+
assert df.iloc[0, 2] == 3
708+
assert df.dtypes.iloc[2] == np.int64
709+
710+
def test_iloc_setitem_list_duplicate_columns(self):
711+
# GH#22036 setting with same-sized list
712+
df = pd.DataFrame([[0, "str", "str2"]], columns=["a", "b", "b"])
713+
714+
df.iloc[:, 2] = ["str3"]
715+
716+
expected = pd.DataFrame([[0, "str", "str3"]], columns=["a", "b", "b"])
717+
tm.assert_frame_equal(df, expected)
718+
719+
def test_iloc_setitem_series_duplicate_columns(self):
720+
df = pd.DataFrame(
721+
np.arange(8, dtype=np.int64).reshape(2, 4), columns=["A", "B", "A", "B"]
722+
)
723+
df.iloc[:, 0] = df.iloc[:, 0].astype(np.float64)
724+
assert df.dtypes.iloc[2] == np.int64

0 commit comments

Comments
 (0)