Skip to content

Bug in iloc aligned objects #37728

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Nov 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ Indexing
- Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`)
- Bug in :meth:`DataFrame.reindex` raising ``IndexingError`` wrongly for empty :class:`DataFrame` with ``tolerance`` not None or ``method="nearest"`` (:issue:`27315`)
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`)
- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`)

Missing
^^^^^^^
Expand Down
35 changes: 20 additions & 15 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ def __setitem__(self, key, value):
self._has_valid_setitem_indexer(key)

iloc = self if self.name == "iloc" else self.obj.iloc
iloc._setitem_with_indexer(indexer, value)
iloc._setitem_with_indexer(indexer, value, self.name)

def _validate_key(self, key, axis: int):
"""
Expand Down Expand Up @@ -1517,7 +1517,7 @@ def _get_setitem_indexer(self, key):

# -------------------------------------------------------------------

def _setitem_with_indexer(self, indexer, value):
def _setitem_with_indexer(self, indexer, value, name="iloc"):
"""
_setitem_with_indexer is for setting values on a Series/DataFrame
using positional indexers.
Expand Down Expand Up @@ -1593,7 +1593,7 @@ def _setitem_with_indexer(self, indexer, value):
new_indexer = convert_from_missing_indexer_tuple(
indexer, self.obj.axes
)
self._setitem_with_indexer(new_indexer, value)
self._setitem_with_indexer(new_indexer, value, name)

return

Expand Down Expand Up @@ -1624,11 +1624,11 @@ def _setitem_with_indexer(self, indexer, value):
# align and set the values
if take_split_path:
# We have to operate column-wise
self._setitem_with_indexer_split_path(indexer, value)
self._setitem_with_indexer_split_path(indexer, value, name)
else:
self._setitem_single_block(indexer, value)
self._setitem_single_block(indexer, value, name)

def _setitem_with_indexer_split_path(self, indexer, value):
def _setitem_with_indexer_split_path(self, indexer, value, name: str):
"""
Setitem column-wise.
"""
Expand All @@ -1642,7 +1642,7 @@ def _setitem_with_indexer_split_path(self, indexer, value):
if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:
raise ValueError(r"Cannot set values with ndim > 2")

if isinstance(value, ABCSeries):
if isinstance(value, ABCSeries) and name != "iloc":
value = self._align_series(indexer, value)

# Ensure we have something we can iterate over
Expand All @@ -1657,7 +1657,7 @@ def _setitem_with_indexer_split_path(self, indexer, value):
if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:

if isinstance(value, ABCDataFrame):
self._setitem_with_indexer_frame_value(indexer, value)
self._setitem_with_indexer_frame_value(indexer, value, name)

elif np.ndim(value) == 2:
self._setitem_with_indexer_2d_value(indexer, value)
Expand Down Expand Up @@ -1714,7 +1714,7 @@ def _setitem_with_indexer_2d_value(self, indexer, value):
# setting with a list, re-coerces
self._setitem_single_column(loc, value[:, i].tolist(), pi)

def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"):
def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name: str):
ilocs = self._ensure_iterable_column_indexer(indexer[1])

sub_indexer = list(indexer)
Expand All @@ -1724,7 +1724,13 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"):

unique_cols = value.columns.is_unique

if not unique_cols and value.columns.equals(self.obj.columns):
# We do not want to align the value in case of iloc GH#37728
if name == "iloc":
for i, loc in enumerate(ilocs):
val = value.iloc[:, i]
self._setitem_single_column(loc, val, pi)

elif not unique_cols and value.columns.equals(self.obj.columns):
# We assume we are already aligned, see
# test_iloc_setitem_frame_duplicate_columns_multiple_blocks
for loc in ilocs:
Expand Down Expand Up @@ -1787,7 +1793,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
# reset the sliced object if unique
self.obj._iset_item(loc, ser)

def _setitem_single_block(self, indexer, value):
def _setitem_single_block(self, indexer, value, name: str):
"""
_setitem_with_indexer for the case when we have a single Block.
"""
Expand Down Expand Up @@ -1815,14 +1821,13 @@ def _setitem_single_block(self, indexer, value):
return

indexer = maybe_convert_ix(*indexer)

if isinstance(value, (ABCSeries, dict)):
if isinstance(value, ABCSeries) and name != "iloc" or isinstance(value, dict):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for follow-up, can you put parens where appropriate so i dont have to think about whether the "and" gets evaluated before the "or"

# TODO(EA): ExtensionBlock.setitem this causes issues with
# setting for extensionarrays that store dicts. Need to decide
# if it's worth supporting that.
value = self._align_series(indexer, Series(value))

elif isinstance(value, ABCDataFrame):
elif isinstance(value, ABCDataFrame) and name != "iloc":
value = self._align_frame(indexer, value)

# check for chained assignment
Expand Down Expand Up @@ -1854,7 +1859,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
if index.is_unique:
new_indexer = index.get_indexer([new_index[-1]])
if (new_indexer != -1).any():
return self._setitem_with_indexer(new_indexer, value)
return self._setitem_with_indexer(new_indexer, value, "loc")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment explaining that we only get here with name=="loc" and thats why its hardcoded (or assertion)


# this preserves dtype of the value
new_values = Series([value])._values
Expand Down
9 changes: 0 additions & 9 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,15 +289,6 @@ def test_setitem_periodindex(self):
assert isinstance(rs.index, PeriodIndex)
tm.assert_index_equal(rs.index, rng)

@pytest.mark.parametrize("klass", [list, np.array])
def test_iloc_setitem_bool_indexer(self, klass):
# GH: 36741
df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]})
indexer = klass([True, False, False])
df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
tm.assert_frame_equal(df, expected)


class TestDataFrameSetItemSlicing:
def test_setitem_slice_position(self):
Expand Down
41 changes: 41 additions & 0 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,39 @@ def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self):
with pytest.raises(ValueError, match=msg):
obj.iloc[nd3] = 0

def test_iloc_assign_series_to_df_cell(self):
# GH 37593
df = DataFrame(columns=["a"], index=[0])
df.iloc[0, 0] = Series([1, 2, 3])
expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0])
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("klass", [list, np.array])
def test_iloc_setitem_bool_indexer(self, klass):
# GH#36741
df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]})
indexer = klass([True, False, False])
df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("indexer", [[1], slice(1, 2)])
def test_setitem_iloc_pure_position_based(self, indexer):
# GH#22046
df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
df2.iloc[:, indexer] = df1.iloc[:, [0]]
expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]})
tm.assert_frame_equal(df2, expected)

def test_setitem_iloc_dictionary_value(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick for future: setitem_iloc -> iloc_setitem

# GH#37728
df = DataFrame({"x": [1, 2], "y": [2, 2]})
rhs = dict(x=9, y=99)
df.iloc[1] = rhs
expected = DataFrame({"x": [1, 9], "y": [2, 99]})
tm.assert_frame_equal(df, expected)


class TestILocErrors:
# NB: this test should work for _any_ Series we can pass as
Expand Down Expand Up @@ -966,3 +999,11 @@ def test_iloc(self):
def test_iloc_getitem_nonunique(self):
ser = Series([0, 1, 2], index=[0, 1, 0])
assert ser.iloc[2] == 2

def test_setitem_iloc_pure_position_based(self):
# GH#22046
ser1 = Series([1, 2, 3])
ser2 = Series([4, 5, 6], index=[1, 0, 2])
ser1.iloc[1:3] = ser2.iloc[1:3]
expected = Series([1, 5, 6])
tm.assert_series_equal(ser1, expected)
27 changes: 16 additions & 11 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,43 +668,48 @@ def test_float_index_at_iat(self):
def test_rhs_alignment(self):
# GH8258, tests that both rows & columns are aligned to what is
# assigned to. covers both uniform data-type & multi-type cases
def run_tests(df, rhs, right):
def run_tests(df, rhs, right_loc, right_iloc):
# label, index, slice
lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4)
lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3)

left = df.copy()
left.loc[lbl_one, lbl_two] = rhs
tm.assert_frame_equal(left, right)
tm.assert_frame_equal(left, right_loc)

left = df.copy()
left.iloc[idx_one, idx_two] = rhs
tm.assert_frame_equal(left, right)
tm.assert_frame_equal(left, right_iloc)

left = df.copy()
left.iloc[slice_one, slice_two] = rhs
tm.assert_frame_equal(left, right)
tm.assert_frame_equal(left, right_iloc)

xs = np.arange(20).reshape(5, 4)
cols = ["jim", "joe", "jolie", "joline"]
df = DataFrame(xs, columns=cols, index=list("abcde"))
df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64")

# right hand side; permute the indices and multiplpy by -2
rhs = -2 * df.iloc[3:0:-1, 2:0:-1]

# expected `right` result; just multiply by -2
right = df.copy()
right.iloc[1:4, 1:3] *= -2
right_iloc = df.copy()
right_iloc["joe"] = [1, 14, 10, 6, 17]
right_iloc["jolie"] = [2, 13, 9, 5, 18]
right_iloc.iloc[1:4, 1:3] *= -2
right_loc = df.copy()
right_loc.iloc[1:4, 1:3] *= -2

# run tests with uniform dtypes
run_tests(df, rhs, right)
run_tests(df, rhs, right_loc, right_iloc)

# make frames multi-type & re-run tests
for frame in [df, rhs, right]:
for frame in [df, rhs, right_loc, right_iloc]:
frame["joe"] = frame["joe"].astype("float64")
frame["jolie"] = frame["jolie"].map("@{}".format)

run_tests(df, rhs, right)
right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0]
right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"]
run_tests(df, rhs, right_loc, right_iloc)

def test_str_label_slicing_with_negative_step(self):
SLC = pd.IndexSlice
Expand Down