Skip to content

Bug in iloc aligned objects #37728

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Nov 19, 2020
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ Indexing
- Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`)
- Bug in :meth:`DataFrame.reindex` raising ``IndexingError`` wrongly for empty :class:`DataFrame` with ``tolerance`` not None or ``method="nearest"`` (:issue:`27315`)
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`)
- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligned objects in ``__setitem__`` (:issue:`22046`)

Missing
^^^^^^^
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3135,7 +3135,9 @@ def _setitem_array(self, key, value):
key, axis=1, raise_missing=False
)[1]
self._check_setitem_copy()
self.iloc._setitem_with_indexer((slice(None), indexer), value)
self.iloc._setitem_with_indexer(
(slice(None), indexer), value, self.iloc.name
)

def _setitem_frame(self, key, value):
# support boolean setting with DataFrame input, e.g.
Expand Down
37 changes: 21 additions & 16 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ def __setitem__(self, key, value):
self._has_valid_setitem_indexer(key)

iloc = self if self.name == "iloc" else self.obj.iloc
iloc._setitem_with_indexer(indexer, value)
iloc._setitem_with_indexer(indexer, value, self.name)

def _validate_key(self, key, axis: int):
"""
Expand Down Expand Up @@ -1517,7 +1517,7 @@ def _get_setitem_indexer(self, key):

# -------------------------------------------------------------------

def _setitem_with_indexer(self, indexer, value):
def _setitem_with_indexer(self, indexer, value, name="iloc"):
"""
_setitem_with_indexer is for setting values on a Series/DataFrame
using positional indexers.
Expand Down Expand Up @@ -1593,7 +1593,7 @@ def _setitem_with_indexer(self, indexer, value):
new_indexer = convert_from_missing_indexer_tuple(
indexer, self.obj.axes
)
self._setitem_with_indexer(new_indexer, value)
self._setitem_with_indexer(new_indexer, value, name)

return

Expand All @@ -1618,17 +1618,17 @@ def _setitem_with_indexer(self, indexer, value):
indexer, missing = convert_missing_indexer(indexer)

if missing:
self._setitem_with_indexer_missing(indexer, value)
self._setitem_with_indexer_missing(indexer, value, name)
return

# align and set the values
if take_split_path:
# We have to operate column-wise
self._setitem_with_indexer_split_path(indexer, value)
self._setitem_with_indexer_split_path(indexer, value, name)
else:
self._setitem_single_block(indexer, value)
self._setitem_single_block(indexer, value, name)

def _setitem_with_indexer_split_path(self, indexer, value):
def _setitem_with_indexer_split_path(self, indexer, value, name):
"""
Setitem column-wise.
"""
Expand All @@ -1642,7 +1642,7 @@ def _setitem_with_indexer_split_path(self, indexer, value):
if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:
raise ValueError(r"Cannot set values with ndim > 2")

if isinstance(value, ABCSeries):
if isinstance(value, ABCSeries) and name != "iloc":
value = self._align_series(indexer, value)

# Ensure we have something we can iterate over
Expand All @@ -1657,7 +1657,7 @@ def _setitem_with_indexer_split_path(self, indexer, value):
if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:

if isinstance(value, ABCDataFrame):
self._setitem_with_indexer_frame_value(indexer, value)
self._setitem_with_indexer_frame_value(indexer, value, name)

elif np.ndim(value) == 2:
self._setitem_with_indexer_2d_value(indexer, value)
Expand Down Expand Up @@ -1714,7 +1714,7 @@ def _setitem_with_indexer_2d_value(self, indexer, value):
# setting with a list, re-coerces
self._setitem_single_column(loc, value[:, i].tolist(), pi)

def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"):
def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you type name here (and above)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

ilocs = self._ensure_iterable_column_indexer(indexer[1])

sub_indexer = list(indexer)
Expand All @@ -1724,7 +1724,12 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"):

unique_cols = value.columns.is_unique

if not unique_cols and value.columns.equals(self.obj.columns):
if name == "iloc":
for i, loc in enumerate(ilocs):
val = value.iloc[:, i]
self._setitem_single_column(loc, val, pi)

elif not unique_cols and value.columns.equals(self.obj.columns):
# We assume we are already aligned, see
# test_iloc_setitem_frame_duplicate_columns_multiple_blocks
for loc in ilocs:
Expand Down Expand Up @@ -1787,7 +1792,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
# reset the sliced object if unique
self.obj._iset_item(loc, ser)

def _setitem_single_block(self, indexer, value):
def _setitem_single_block(self, indexer, value, name):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

type

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

"""
_setitem_with_indexer for the case when we have a single Block.
"""
Expand Down Expand Up @@ -1816,13 +1821,13 @@ def _setitem_single_block(self, indexer, value):

indexer = maybe_convert_ix(*indexer)

if isinstance(value, (ABCSeries, dict)):
if isinstance(value, (ABCSeries, dict)) and name != "iloc":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we have a dict here dont we still want to wrap it in a Series?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, have no tests getting here with iloc. Will add one

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LMK when these tests are all ready and ill take another look. this one is almost ready

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests are all in

# TODO(EA): ExtensionBlock.setitem this causes issues with
# setting for extensionarrays that store dicts. Need to decide
# if it's worth supporting that.
value = self._align_series(indexer, Series(value))

elif isinstance(value, ABCDataFrame):
elif isinstance(value, ABCDataFrame) and name != "iloc":
value = self._align_frame(indexer, value)

# check for chained assignment
Expand All @@ -1833,7 +1838,7 @@ def _setitem_single_block(self, indexer, value):
self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
self.obj._maybe_update_cacher(clear=True)

def _setitem_with_indexer_missing(self, indexer, value):
def _setitem_with_indexer_missing(self, indexer, value, name):
"""
Insert new row(s) or column(s) into the Series or DataFrame.
"""
Expand All @@ -1854,7 +1859,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
if index.is_unique:
new_indexer = index.get_indexer([new_index[-1]])
if (new_indexer != -1).any():
return self._setitem_with_indexer(new_indexer, value)
return self._setitem_with_indexer(new_indexer, value, name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

double check me on this, but i think it might be the case that we only get here with name = "loc"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, you are right. Iloc raises when indexer is a dict. That is the only way we can get in there. Removed it


# this preserves dtype of the value
new_values = Series([value])._values
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,14 @@ def test_iloc_setitem_bool_indexer(self, klass):
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
tm.assert_frame_equal(df, expected)

def test_setitem_iloc_pure_position_based(self):
# GH: 22046
df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
df2.iloc[:, [1]] = df1.iloc[:, [0]]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are there other indexer/value type combinations that are relevant here? e.g. the value being set here is a DataFrame, would a Series trigger the same bug? what if instead of [1] the indexer was slice(1, 2)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
df2.iloc[:, [1]] = df1.iloc[:, 0]

raises

Traceback (most recent call last):
  File "/home/developer/.config/JetBrains/PyCharm2020.2/scratches/scratch_4.py", line 132, in <module>
    df2.iloc[:, [1]] = df1.iloc[:, 0]
  File "/home/developer/PycharmProjects/pandas/pandas/core/indexing.py", line 684, in __setitem__
    iloc._setitem_with_indexer(indexer, value)
  File "/home/developer/PycharmProjects/pandas/pandas/core/indexing.py", line 1637, in _setitem_with_indexer
    self._setitem_single_block(indexer, value)
  File "/home/developer/PycharmProjects/pandas/pandas/core/indexing.py", line 1851, in _setitem_single_block
    self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
  File "/home/developer/PycharmProjects/pandas/pandas/core/internals/managers.py", line 562, in setitem
    return self.apply("setitem", indexer=indexer, value=value)
  File "/home/developer/PycharmProjects/pandas/pandas/core/internals/managers.py", line 427, in apply
    applied = getattr(b, f)(**kwargs)
  File "/home/developer/PycharmProjects/pandas/pandas/core/internals/blocks.py", line 1005, in setitem
    values[indexer] = value
ValueError: shape mismatch: value array of shape (3,) could not be broadcast to indexing result of shape (1,3)

Process finished with exit code 1

Did not check slice previously, this triggered the bug too. Will add test

expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]})
tm.assert_frame_equal(df2, expected)


class TestDataFrameSetItemSlicing:
def test_setitem_slice_position(self):
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,13 @@ def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self):
with pytest.raises(ValueError, match=msg):
obj.iloc[nd3] = 0

def test_iloc_assign_series_to_df_cell(self):
# GH 37593
df = DataFrame(columns=["a"], index=[0])
df.iloc[0, 0] = Series([1, 2, 3])
expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0])
tm.assert_frame_equal(df, expected)


class TestILocErrors:
# NB: this test should work for _any_ Series we can pass as
Expand Down
27 changes: 16 additions & 11 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,43 +668,48 @@ def test_float_index_at_iat(self):
def test_rhs_alignment(self):
# GH8258, tests that both rows & columns are aligned to what is
# assigned to. covers both uniform data-type & multi-type cases
def run_tests(df, rhs, right):
def run_tests(df, rhs, right_loc, right_iloc):
# label, index, slice
lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4)
lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3)

left = df.copy()
left.loc[lbl_one, lbl_two] = rhs
tm.assert_frame_equal(left, right)
tm.assert_frame_equal(left, right_loc)

left = df.copy()
left.iloc[idx_one, idx_two] = rhs
tm.assert_frame_equal(left, right)
tm.assert_frame_equal(left, right_iloc)

left = df.copy()
left.iloc[slice_one, slice_two] = rhs
tm.assert_frame_equal(left, right)
tm.assert_frame_equal(left, right_iloc)

xs = np.arange(20).reshape(5, 4)
cols = ["jim", "joe", "jolie", "joline"]
df = DataFrame(xs, columns=cols, index=list("abcde"))
df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64")

# right hand side; permute the indices and multiplpy by -2
rhs = -2 * df.iloc[3:0:-1, 2:0:-1]

# expected `right` result; just multiply by -2
right = df.copy()
right.iloc[1:4, 1:3] *= -2
right_iloc = df.copy()
right_iloc["joe"] = [1, 14, 10, 6, 17]
right_iloc["jolie"] = [2, 13, 9, 5, 18]
right_iloc.iloc[1:4, 1:3] *= -2
right_loc = df.copy()
right_loc.iloc[1:4, 1:3] *= -2

# run tests with uniform dtypes
run_tests(df, rhs, right)
run_tests(df, rhs, right_loc, right_iloc)

# make frames multi-type & re-run tests
for frame in [df, rhs, right]:
for frame in [df, rhs, right_loc, right_iloc]:
frame["joe"] = frame["joe"].astype("float64")
frame["jolie"] = frame["jolie"].map("@{}".format)

run_tests(df, rhs, right)
right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0]
right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"]
run_tests(df, rhs, right_loc, right_iloc)

def test_str_label_slicing_with_negative_step(self):
SLC = pd.IndexSlice
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,3 +282,12 @@ def test_setitem_slice_into_readonly_backing_data():
series[1:3] = 1

assert not array.any()


def test_setitem_iloc_pure_position_based():
# GH: 22046
ser1 = Series([1, 2, 3])
ser2 = Series([4, 5, 6], index=[1, 0, 2])
ser1.iloc[1:3] = ser2.iloc[1:3]
expected = Series([1, 5, 6])
tm.assert_series_equal(ser1, expected)