Skip to content

Bug in iloc aligned objects #37728

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Nov 19, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ Indexing
- Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`)
- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when numeric label was given for object :class:`Index` although label was in :class:`Index` (:issue:`26491`)
- Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`)
- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligned objects in ``__setitem__`` (:issue:`22046`)

Missing
^^^^^^^
Expand Down
18 changes: 12 additions & 6 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,7 @@ def __setitem__(self, key, value):
self._has_valid_setitem_indexer(key)

iloc = self if self.name == "iloc" else self.obj.iloc
iloc.name = self.name
iloc._setitem_with_indexer(indexer, value)

def _validate_key(self, key, axis: int):
Expand Down Expand Up @@ -1648,7 +1649,7 @@ def _setitem_with_indexer_split_path(self, indexer, value):
if len(indexer) > self.ndim:
raise IndexError("too many indices for array")

if isinstance(value, ABCSeries):
if isinstance(value, ABCSeries) and self.name != "iloc":
value = self._align_series(indexer, value)

# Ensure we have something we can iterate over
Expand Down Expand Up @@ -1767,15 +1768,20 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"):
raise ValueError("Setting with non-unique columns is not allowed.")

else:
index = 0
for loc in ilocs:
item = self.obj.columns[loc]
if item in value:
sub_indexer[1] = item
val = self._align_series(
tuple(sub_indexer), value[item], multiindex_indexer
)
if self.name == "loc":
val = self._align_series(
tuple(sub_indexer), value[item], multiindex_indexer
)
else:
val = value.iloc[:, index]
else:
val = np.nan
index += 1

self._setitem_single_column(loc, val, plane_indexer)

Expand Down Expand Up @@ -1833,13 +1839,13 @@ def _setitem_single_block(self, indexer, value):

indexer = maybe_convert_ix(*indexer)

if isinstance(value, (ABCSeries, dict)):
if isinstance(value, (ABCSeries, dict)) and self.name != "iloc":
# TODO(EA): ExtensionBlock.setitem this causes issues with
# setting for extensionarrays that store dicts. Need to decide
# if it's worth supporting that.
value = self._align_series(indexer, Series(value))

elif isinstance(value, ABCDataFrame):
elif isinstance(value, ABCDataFrame) and self.name != "iloc":
value = self._align_frame(indexer, value)

# check for chained assignment
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,14 @@ def test_iloc_setitem_bool_indexer(self, klass):
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
tm.assert_frame_equal(df, expected)

def test_setitem_iloc_pure_position_based(self):
# GH: 22046
df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
df2.iloc[:, [1]] = df1.iloc[:, [0]]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are there other indexer/value type combinations that are relevant here? e.g. the value being set here is a DataFrame, would a Series trigger the same bug? what if instead of [1] the indexer was slice(1, 2)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
df2.iloc[:, [1]] = df1.iloc[:, 0]

raises

Traceback (most recent call last):
  File "/home/developer/.config/JetBrains/PyCharm2020.2/scratches/scratch_4.py", line 132, in <module>
    df2.iloc[:, [1]] = df1.iloc[:, 0]
  File "/home/developer/PycharmProjects/pandas/pandas/core/indexing.py", line 684, in __setitem__
    iloc._setitem_with_indexer(indexer, value)
  File "/home/developer/PycharmProjects/pandas/pandas/core/indexing.py", line 1637, in _setitem_with_indexer
    self._setitem_single_block(indexer, value)
  File "/home/developer/PycharmProjects/pandas/pandas/core/indexing.py", line 1851, in _setitem_single_block
    self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
  File "/home/developer/PycharmProjects/pandas/pandas/core/internals/managers.py", line 562, in setitem
    return self.apply("setitem", indexer=indexer, value=value)
  File "/home/developer/PycharmProjects/pandas/pandas/core/internals/managers.py", line 427, in apply
    applied = getattr(b, f)(**kwargs)
  File "/home/developer/PycharmProjects/pandas/pandas/core/internals/blocks.py", line 1005, in setitem
    values[indexer] = value
ValueError: shape mismatch: value array of shape (3,) could not be broadcast to indexing result of shape (1,3)

Process finished with exit code 1

Did not check slice previously, this triggered the bug too. Will add test

expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]})
tm.assert_frame_equal(df2, expected)


class TestDataFrameSetItemSlicing:
def test_setitem_slice_position(self):
Expand Down
25 changes: 15 additions & 10 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,22 +712,22 @@ def test_mixed_index_no_fallback(self):
def test_rhs_alignment(self):
# GH8258, tests that both rows & columns are aligned to what is
# assigned to. covers both uniform data-type & multi-type cases
def run_tests(df, rhs, right):
def run_tests(df, rhs, right_loc, right_iloc):
# label, index, slice
lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4)
lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3)

left = df.copy()
left.loc[lbl_one, lbl_two] = rhs
tm.assert_frame_equal(left, right)
tm.assert_frame_equal(left, right_loc)

left = df.copy()
left.iloc[idx_one, idx_two] = rhs
tm.assert_frame_equal(left, right)
tm.assert_frame_equal(left, right_iloc)

left = df.copy()
left.iloc[slice_one, slice_two] = rhs
tm.assert_frame_equal(left, right)
tm.assert_frame_equal(left, right_iloc)

xs = np.arange(20).reshape(5, 4)
cols = ["jim", "joe", "jolie", "joline"]
Expand All @@ -737,18 +737,23 @@ def run_tests(df, rhs, right):
rhs = -2 * df.iloc[3:0:-1, 2:0:-1]

# expected `right` result; just multiply by -2
right = df.copy()
right.iloc[1:4, 1:3] *= -2
right_iloc = df.copy()
right_iloc["joe"] = [1, 14, 10, 6, 17]
right_iloc["jolie"] = [2, 13, 9, 5, 18]
right_iloc.iloc[1:4, 1:3] *= -2
right_loc = df.copy()
right_loc.iloc[1:4, 1:3] *= -2

# run tests with uniform dtypes
run_tests(df, rhs, right)
run_tests(df, rhs, right_loc, right_iloc)

# make frames multi-type & re-run tests
for frame in [df, rhs, right]:
for frame in [df, rhs, right_loc, right_iloc]:
frame["joe"] = frame["joe"].astype("float64")
frame["jolie"] = frame["jolie"].map("@{}".format)

run_tests(df, rhs, right)
right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0]
right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"]
run_tests(df, rhs, right_loc, right_iloc)

def test_str_label_slicing_with_negative_step(self):
SLC = pd.IndexSlice
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,12 @@ def test_setitem_slice_into_readonly_backing_data():
series[1:3] = 1

assert not array.any()


def test_setitem_iloc_pure_position_based():
# GH: 22046
ser1 = Series([1, 2, 3])
ser2 = Series([4, 5, 6], index=[1, 0, 2])
ser1.iloc[1:3] = ser2.iloc[1:3]
expected = Series([1, 5, 6])
tm.assert_series_equal(ser1, expected)