diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 62da3c0c5cddc..1f05c35097b22 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -589,6 +589,7 @@ Indexing - Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`) - Bug in :meth:`DataFrame.reindex` raising ``IndexingError`` wrongly for empty :class:`DataFrame` with ``tolerance`` not None or ``method="nearest"`` (:issue:`27315`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`) +- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`) Missing ^^^^^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 62b1554246e26..9bfbc22b1e628 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -681,7 +681,7 @@ def __setitem__(self, key, value): self._has_valid_setitem_indexer(key) iloc = self if self.name == "iloc" else self.obj.iloc - iloc._setitem_with_indexer(indexer, value) + iloc._setitem_with_indexer(indexer, value, self.name) def _validate_key(self, key, axis: int): """ @@ -1517,7 +1517,7 @@ def _get_setitem_indexer(self, key): # ------------------------------------------------------------------- - def _setitem_with_indexer(self, indexer, value): + def _setitem_with_indexer(self, indexer, value, name="iloc"): """ _setitem_with_indexer is for setting values on a Series/DataFrame using positional indexers. @@ -1593,7 +1593,7 @@ def _setitem_with_indexer(self, indexer, value): new_indexer = convert_from_missing_indexer_tuple( indexer, self.obj.axes ) - self._setitem_with_indexer(new_indexer, value) + self._setitem_with_indexer(new_indexer, value, name) return @@ -1624,11 +1624,11 @@ def _setitem_with_indexer(self, indexer, value): # align and set the values if take_split_path: # We have to operate column-wise - self._setitem_with_indexer_split_path(indexer, value) + self._setitem_with_indexer_split_path(indexer, value, name) else: - self._setitem_single_block(indexer, value) + self._setitem_single_block(indexer, value, name) - def _setitem_with_indexer_split_path(self, indexer, value): + def _setitem_with_indexer_split_path(self, indexer, value, name: str): """ Setitem column-wise. """ @@ -1642,7 +1642,7 @@ def _setitem_with_indexer_split_path(self, indexer, value): if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2: raise ValueError(r"Cannot set values with ndim > 2") - if isinstance(value, ABCSeries): + if isinstance(value, ABCSeries) and name != "iloc": value = self._align_series(indexer, value) # Ensure we have something we can iterate over @@ -1657,7 +1657,7 @@ def _setitem_with_indexer_split_path(self, indexer, value): if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0: if isinstance(value, ABCDataFrame): - self._setitem_with_indexer_frame_value(indexer, value) + self._setitem_with_indexer_frame_value(indexer, value, name) elif np.ndim(value) == 2: self._setitem_with_indexer_2d_value(indexer, value) @@ -1714,7 +1714,7 @@ def _setitem_with_indexer_2d_value(self, indexer, value): # setting with a list, re-coerces self._setitem_single_column(loc, value[:, i].tolist(), pi) - def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"): + def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name: str): ilocs = self._ensure_iterable_column_indexer(indexer[1]) sub_indexer = list(indexer) @@ -1724,7 +1724,13 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"): unique_cols = value.columns.is_unique - if not unique_cols and value.columns.equals(self.obj.columns): + # We do not want to align the value in case of iloc GH#37728 + if name == "iloc": + for i, loc in enumerate(ilocs): + val = value.iloc[:, i] + self._setitem_single_column(loc, val, pi) + + elif not unique_cols and value.columns.equals(self.obj.columns): # We assume we are already aligned, see # test_iloc_setitem_frame_duplicate_columns_multiple_blocks for loc in ilocs: @@ -1787,7 +1793,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): # reset the sliced object if unique self.obj._iset_item(loc, ser) - def _setitem_single_block(self, indexer, value): + def _setitem_single_block(self, indexer, value, name: str): """ _setitem_with_indexer for the case when we have a single Block. """ @@ -1815,14 +1821,13 @@ def _setitem_single_block(self, indexer, value): return indexer = maybe_convert_ix(*indexer) - - if isinstance(value, (ABCSeries, dict)): + if isinstance(value, ABCSeries) and name != "iloc" or isinstance(value, dict): # TODO(EA): ExtensionBlock.setitem this causes issues with # setting for extensionarrays that store dicts. Need to decide # if it's worth supporting that. value = self._align_series(indexer, Series(value)) - elif isinstance(value, ABCDataFrame): + elif isinstance(value, ABCDataFrame) and name != "iloc": value = self._align_frame(indexer, value) # check for chained assignment @@ -1854,7 +1859,7 @@ def _setitem_with_indexer_missing(self, indexer, value): if index.is_unique: new_indexer = index.get_indexer([new_index[-1]]) if (new_indexer != -1).any(): - return self._setitem_with_indexer(new_indexer, value) + return self._setitem_with_indexer(new_indexer, value, "loc") # this preserves dtype of the value new_values = Series([value])._values diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index cb04a61b9e1cb..e4c57dc2b72fc 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -289,15 +289,6 @@ def test_setitem_periodindex(self): assert isinstance(rs.index, PeriodIndex) tm.assert_index_equal(rs.index, rng) - @pytest.mark.parametrize("klass", [list, np.array]) - def test_iloc_setitem_bool_indexer(self, klass): - # GH: 36741 - df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) - indexer = klass([True, False, False]) - df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 - expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) - tm.assert_frame_equal(df, expected) - class TestDataFrameSetItemSlicing: def test_setitem_slice_position(self): diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 0360d7e01e62d..84073bbb023a8 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -801,6 +801,39 @@ def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): with pytest.raises(ValueError, match=msg): obj.iloc[nd3] = 0 + def test_iloc_assign_series_to_df_cell(self): + # GH 37593 + df = DataFrame(columns=["a"], index=[0]) + df.iloc[0, 0] = Series([1, 2, 3]) + expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0]) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("klass", [list, np.array]) + def test_iloc_setitem_bool_indexer(self, klass): + # GH#36741 + df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) + indexer = klass([True, False, False]) + df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 + expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [[1], slice(1, 2)]) + def test_setitem_iloc_pure_position_based(self, indexer): + # GH#22046 + df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]}) + df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + df2.iloc[:, indexer] = df1.iloc[:, [0]] + expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]}) + tm.assert_frame_equal(df2, expected) + + def test_setitem_iloc_dictionary_value(self): + # GH#37728 + df = DataFrame({"x": [1, 2], "y": [2, 2]}) + rhs = dict(x=9, y=99) + df.iloc[1] = rhs + expected = DataFrame({"x": [1, 9], "y": [2, 99]}) + tm.assert_frame_equal(df, expected) + class TestILocErrors: # NB: this test should work for _any_ Series we can pass as @@ -966,3 +999,11 @@ def test_iloc(self): def test_iloc_getitem_nonunique(self): ser = Series([0, 1, 2], index=[0, 1, 0]) assert ser.iloc[2] == 2 + + def test_setitem_iloc_pure_position_based(self): + # GH#22046 + ser1 = Series([1, 2, 3]) + ser2 = Series([4, 5, 6], index=[1, 0, 2]) + ser1.iloc[1:3] = ser2.iloc[1:3] + expected = Series([1, 5, 6]) + tm.assert_series_equal(ser1, expected) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 7470d2768590e..aae8398db2b4d 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -668,43 +668,48 @@ def test_float_index_at_iat(self): def test_rhs_alignment(self): # GH8258, tests that both rows & columns are aligned to what is # assigned to. covers both uniform data-type & multi-type cases - def run_tests(df, rhs, right): + def run_tests(df, rhs, right_loc, right_iloc): # label, index, slice lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4) lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3) left = df.copy() left.loc[lbl_one, lbl_two] = rhs - tm.assert_frame_equal(left, right) + tm.assert_frame_equal(left, right_loc) left = df.copy() left.iloc[idx_one, idx_two] = rhs - tm.assert_frame_equal(left, right) + tm.assert_frame_equal(left, right_iloc) left = df.copy() left.iloc[slice_one, slice_two] = rhs - tm.assert_frame_equal(left, right) + tm.assert_frame_equal(left, right_iloc) xs = np.arange(20).reshape(5, 4) cols = ["jim", "joe", "jolie", "joline"] - df = DataFrame(xs, columns=cols, index=list("abcde")) + df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64") # right hand side; permute the indices and multiplpy by -2 rhs = -2 * df.iloc[3:0:-1, 2:0:-1] # expected `right` result; just multiply by -2 - right = df.copy() - right.iloc[1:4, 1:3] *= -2 + right_iloc = df.copy() + right_iloc["joe"] = [1, 14, 10, 6, 17] + right_iloc["jolie"] = [2, 13, 9, 5, 18] + right_iloc.iloc[1:4, 1:3] *= -2 + right_loc = df.copy() + right_loc.iloc[1:4, 1:3] *= -2 # run tests with uniform dtypes - run_tests(df, rhs, right) + run_tests(df, rhs, right_loc, right_iloc) # make frames multi-type & re-run tests - for frame in [df, rhs, right]: + for frame in [df, rhs, right_loc, right_iloc]: frame["joe"] = frame["joe"].astype("float64") frame["jolie"] = frame["jolie"].map("@{}".format) - - run_tests(df, rhs, right) + right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0] + right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"] + run_tests(df, rhs, right_loc, right_iloc) def test_str_label_slicing_with_negative_step(self): SLC = pd.IndexSlice