From 9d115113df1460bad097a5e41c0369e7d6aa737e Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 10 Nov 2020 01:24:51 +0100 Subject: [PATCH 01/16] Bug in iloc aligned objects --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexing.py | 18 +++++++++----- pandas/tests/frame/indexing/test_setitem.py | 8 +++++++ pandas/tests/indexing/test_indexing.py | 25 ++++++++++++-------- pandas/tests/series/indexing/test_setitem.py | 9 +++++++ 5 files changed, 45 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e488ca52be8a0..8e2872d9ec58e 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -469,6 +469,7 @@ Indexing - Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when numeric label was given for object :class:`Index` although label was in :class:`Index` (:issue:`26491`) - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) +- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligned objects in ``__setitem__`` (:issue:`22046`) Missing ^^^^^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c5e331a104726..4789a685e09f4 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -681,6 +681,7 @@ def __setitem__(self, key, value): self._has_valid_setitem_indexer(key) iloc = self if self.name == "iloc" else self.obj.iloc + iloc.name = self.name iloc._setitem_with_indexer(indexer, value) def _validate_key(self, key, axis: int): @@ -1648,7 +1649,7 @@ def _setitem_with_indexer_split_path(self, indexer, value): if len(indexer) > self.ndim: raise IndexError("too many indices for array") - if isinstance(value, ABCSeries): + if isinstance(value, ABCSeries) and self.name != "iloc": value = self._align_series(indexer, value) # Ensure we have something we can iterate over @@ -1767,15 +1768,20 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"): raise ValueError("Setting with non-unique columns is not allowed.") else: + index = 0 for loc in ilocs: item = self.obj.columns[loc] if item in value: sub_indexer[1] = item - val = self._align_series( - tuple(sub_indexer), value[item], multiindex_indexer - ) + if self.name == "loc": + val = self._align_series( + tuple(sub_indexer), value[item], multiindex_indexer + ) + else: + val = value.iloc[:, index] else: val = np.nan + index += 1 self._setitem_single_column(loc, val, plane_indexer) @@ -1833,13 +1839,13 @@ def _setitem_single_block(self, indexer, value): indexer = maybe_convert_ix(*indexer) - if isinstance(value, (ABCSeries, dict)): + if isinstance(value, (ABCSeries, dict)) and self.name != "iloc": # TODO(EA): ExtensionBlock.setitem this causes issues with # setting for extensionarrays that store dicts. Need to decide # if it's worth supporting that. value = self._align_series(indexer, Series(value)) - elif isinstance(value, ABCDataFrame): + elif isinstance(value, ABCDataFrame) and self.name != "iloc": value = self._align_frame(indexer, value) # check for chained assignment diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index cb04a61b9e1cb..af923bf9b8f2b 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -298,6 +298,14 @@ def test_iloc_setitem_bool_indexer(self, klass): expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) tm.assert_frame_equal(df, expected) + def test_setitem_iloc_pure_position_based(self): + # GH: 22046 + df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]}) + df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + df2.iloc[:, [1]] = df1.iloc[:, [0]] + expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]}) + tm.assert_frame_equal(df2, expected) + class TestDataFrameSetItemSlicing: def test_setitem_slice_position(self): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 06bd8a5f300bb..14c7f95f67d40 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -712,22 +712,22 @@ def test_mixed_index_no_fallback(self): def test_rhs_alignment(self): # GH8258, tests that both rows & columns are aligned to what is # assigned to. covers both uniform data-type & multi-type cases - def run_tests(df, rhs, right): + def run_tests(df, rhs, right_loc, right_iloc): # label, index, slice lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4) lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3) left = df.copy() left.loc[lbl_one, lbl_two] = rhs - tm.assert_frame_equal(left, right) + tm.assert_frame_equal(left, right_loc) left = df.copy() left.iloc[idx_one, idx_two] = rhs - tm.assert_frame_equal(left, right) + tm.assert_frame_equal(left, right_iloc) left = df.copy() left.iloc[slice_one, slice_two] = rhs - tm.assert_frame_equal(left, right) + tm.assert_frame_equal(left, right_iloc) xs = np.arange(20).reshape(5, 4) cols = ["jim", "joe", "jolie", "joline"] @@ -737,18 +737,23 @@ def run_tests(df, rhs, right): rhs = -2 * df.iloc[3:0:-1, 2:0:-1] # expected `right` result; just multiply by -2 - right = df.copy() - right.iloc[1:4, 1:3] *= -2 + right_iloc = df.copy() + right_iloc["joe"] = [1, 14, 10, 6, 17] + right_iloc["jolie"] = [2, 13, 9, 5, 18] + right_iloc.iloc[1:4, 1:3] *= -2 + right_loc = df.copy() + right_loc.iloc[1:4, 1:3] *= -2 # run tests with uniform dtypes - run_tests(df, rhs, right) + run_tests(df, rhs, right_loc, right_iloc) # make frames multi-type & re-run tests - for frame in [df, rhs, right]: + for frame in [df, rhs, right_loc, right_iloc]: frame["joe"] = frame["joe"].astype("float64") frame["jolie"] = frame["jolie"].map("@{}".format) - - run_tests(df, rhs, right) + right_iloc["joe"] = [1.0, "@-28", "@-20", "@-12", 17.0] + right_iloc["jolie"] = ["@2", -26.0, -18.0, -10.0, "@18"] + run_tests(df, rhs, right_loc, right_iloc) def test_str_label_slicing_with_negative_step(self): SLC = pd.IndexSlice diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 7e25e5200d610..fc432c03a16fa 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -248,3 +248,12 @@ def test_setitem_slice_into_readonly_backing_data(): series[1:3] = 1 assert not array.any() + + +def test_setitem_iloc_pure_position_based(): + # GH: 22046 + ser1 = Series([1, 2, 3]) + ser2 = Series([4, 5, 6], index=[1, 0, 2]) + ser1.iloc[1:3] = ser2.iloc[1:3] + expected = Series([1, 5, 6]) + tm.assert_series_equal(ser1, expected) From ca3f47b95f87cee5c4fa80b45d533f103f34022b Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 10 Nov 2020 20:29:24 +0100 Subject: [PATCH 02/16] Pass name through functions --- pandas/core/frame.py | 8 ++++--- pandas/core/indexing.py | 49 ++++++++++++++++++++--------------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 80743f8cc924b..b2f6e77c72382 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3106,7 +3106,7 @@ def _setitem_slice(self, key: slice, value): # operates on labels and we need to operate positional for # backwards-compat, xref GH#31469 self._check_setitem_copy() - self.iloc._setitem_with_indexer(key, value) + self.iloc._setitem_with_indexer(key, value, self.iloc.name) def _setitem_array(self, key, value): # also raises Exception if object array with NA values @@ -3118,7 +3118,7 @@ def _setitem_array(self, key, value): key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] self._check_setitem_copy() - self.iloc._setitem_with_indexer(indexer, value) + self.iloc._setitem_with_indexer(indexer, value, self.iloc.name) else: if isinstance(value, DataFrame): if len(value.columns) != len(key): @@ -3131,7 +3131,9 @@ def _setitem_array(self, key, value): key, axis=1, raise_missing=False )[1] self._check_setitem_copy() - self.iloc._setitem_with_indexer((slice(None), indexer), value) + self.iloc._setitem_with_indexer( + (slice(None), indexer), value, self.iloc.name + ) def _setitem_frame(self, key, value): # support boolean setting with DataFrame input, e.g. diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 4789a685e09f4..5be3c86b228c3 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -681,8 +681,7 @@ def __setitem__(self, key, value): self._has_valid_setitem_indexer(key) iloc = self if self.name == "iloc" else self.obj.iloc - iloc.name = self.name - iloc._setitem_with_indexer(indexer, value) + iloc._setitem_with_indexer(indexer, value, self.name) def _validate_key(self, key, axis: int): """ @@ -1526,7 +1525,7 @@ def _get_setitem_indexer(self, key): # ------------------------------------------------------------------- - def _setitem_with_indexer(self, indexer, value): + def _setitem_with_indexer(self, indexer, value, name): """ _setitem_with_indexer is for setting values on a Series/DataFrame using positional indexers. @@ -1602,7 +1601,7 @@ def _setitem_with_indexer(self, indexer, value): new_indexer = convert_from_missing_indexer_tuple( indexer, self.obj.axes ) - self._setitem_with_indexer(new_indexer, value) + self._setitem_with_indexer(new_indexer, value, name) return @@ -1627,17 +1626,17 @@ def _setitem_with_indexer(self, indexer, value): indexer, missing = convert_missing_indexer(indexer) if missing: - self._setitem_with_indexer_missing(indexer, value) + self._setitem_with_indexer_missing(indexer, value, name) return # align and set the values if take_split_path: # We have to operate column-wise - self._setitem_with_indexer_split_path(indexer, value) + self._setitem_with_indexer_split_path(indexer, value, name) else: - self._setitem_single_block(indexer, value) + self._setitem_single_block(indexer, value, name) - def _setitem_with_indexer_split_path(self, indexer, value): + def _setitem_with_indexer_split_path(self, indexer, value, name): """ Setitem column-wise. """ @@ -1649,7 +1648,7 @@ def _setitem_with_indexer_split_path(self, indexer, value): if len(indexer) > self.ndim: raise IndexError("too many indices for array") - if isinstance(value, ABCSeries) and self.name != "iloc": + if isinstance(value, ABCSeries) and name != "iloc": value = self._align_series(indexer, value) # Ensure we have something we can iterate over @@ -1678,7 +1677,7 @@ def _setitem_with_indexer_split_path(self, indexer, value): # we have an equal len Frame if isinstance(value, ABCDataFrame): - self._setitem_with_indexer_frame_value(indexer, value) + self._setitem_with_indexer_frame_value(indexer, value, name) # we have an equal len ndarray/convertible to our ilocs # hasattr first, to avoid coercing to ndarray without reason. @@ -1737,7 +1736,7 @@ def _setitem_with_indexer_2d_value(self, indexer, value): # setting with a list, re-coerces self._setitem_single_column(loc, value[:, i].tolist(), plane_indexer) - def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"): + def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name): ilocs = self._ensure_iterable_column_indexer(indexer[1]) sub_indexer = list(indexer) @@ -1747,7 +1746,12 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"): unique_cols = value.columns.is_unique - if not unique_cols and value.columns.equals(self.obj.columns): + if name == "iloc": + for index, loc in zip(range(len(ilocs)), ilocs): + val = value.iloc[:, index] + self._setitem_single_column(loc, val, plane_indexer) + + elif not unique_cols and value.columns.equals(self.obj.columns): # We assume we are already aligned, see # test_iloc_setitem_frame_duplicate_columns_multiple_blocks for loc in ilocs: @@ -1768,20 +1772,15 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame"): raise ValueError("Setting with non-unique columns is not allowed.") else: - index = 0 for loc in ilocs: item = self.obj.columns[loc] if item in value: sub_indexer[1] = item - if self.name == "loc": - val = self._align_series( - tuple(sub_indexer), value[item], multiindex_indexer - ) - else: - val = value.iloc[:, index] + val = self._align_series( + tuple(sub_indexer), value[item], multiindex_indexer + ) else: val = np.nan - index += 1 self._setitem_single_column(loc, val, plane_indexer) @@ -1810,7 +1809,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): # reset the sliced object if unique self.obj._iset_item(loc, ser) - def _setitem_single_block(self, indexer, value): + def _setitem_single_block(self, indexer, value, name): """ _setitem_with_indexer for the case when we have a single Block. """ @@ -1839,13 +1838,13 @@ def _setitem_single_block(self, indexer, value): indexer = maybe_convert_ix(*indexer) - if isinstance(value, (ABCSeries, dict)) and self.name != "iloc": + if isinstance(value, (ABCSeries, dict)) and name != "iloc": # TODO(EA): ExtensionBlock.setitem this causes issues with # setting for extensionarrays that store dicts. Need to decide # if it's worth supporting that. value = self._align_series(indexer, Series(value)) - elif isinstance(value, ABCDataFrame) and self.name != "iloc": + elif isinstance(value, ABCDataFrame) and name != "iloc": value = self._align_frame(indexer, value) # check for chained assignment @@ -1856,7 +1855,7 @@ def _setitem_single_block(self, indexer, value): self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value) self.obj._maybe_update_cacher(clear=True) - def _setitem_with_indexer_missing(self, indexer, value): + def _setitem_with_indexer_missing(self, indexer, value, name): """ Insert new row(s) or column(s) into the Series or DataFrame. """ @@ -1877,7 +1876,7 @@ def _setitem_with_indexer_missing(self, indexer, value): if index.is_unique: new_indexer = index.get_indexer([new_index[-1]]) if (new_indexer != -1).any(): - return self._setitem_with_indexer(new_indexer, value) + return self._setitem_with_indexer(new_indexer, value, name) # this preserves dtype of the value new_values = Series([value])._values From f1306aa764df5821b04c53aa7383552ce73170f7 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 10 Nov 2020 20:34:10 +0100 Subject: [PATCH 03/16] Fix dtype --- pandas/tests/indexing/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 14c7f95f67d40..0e0c118fcfaa5 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -731,7 +731,7 @@ def run_tests(df, rhs, right_loc, right_iloc): xs = np.arange(20).reshape(5, 4) cols = ["jim", "joe", "jolie", "joline"] - df = DataFrame(xs, columns=cols, index=list("abcde")) + df = DataFrame(xs, columns=cols, index=list("abcde"), dtype="int64") # right hand side; permute the indices and multiplpy by -2 rhs = -2 * df.iloc[3:0:-1, 2:0:-1] From c55cd626cecad577c30e265d16543758e5365c37 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 10 Nov 2020 20:40:06 +0100 Subject: [PATCH 04/16] Add test --- pandas/tests/indexing/test_iloc.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index f5f2ac0225bd4..9c63a81cb63f8 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -768,6 +768,13 @@ def test_iloc_getitem_categorical_values(self): expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) + def test_iloc_assign_series_to_df_cell(self): + # GH 37593 + df = DataFrame(columns=["a"], index=[0]) + df.iloc[0, 0] = Series([1, 2, 3]) + expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0]) + tm.assert_frame_equal(df, expected) + class TestILocSetItemDuplicateColumns: def test_iloc_setitem_scalar_duplicate_columns(self): From 6bced6abb6a372e60f04e2943bdde236748d2851 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 22:10:00 +0100 Subject: [PATCH 05/16] Default name to iloc --- pandas/core/frame.py | 4 ++-- pandas/core/indexing.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a8deab442dd62..0bbfff3274596 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3111,7 +3111,7 @@ def _setitem_slice(self, key: slice, value): # operates on labels and we need to operate positional for # backwards-compat, xref GH#31469 self._check_setitem_copy() - self.iloc._setitem_with_indexer(key, value, self.iloc.name) + self.iloc._setitem_with_indexer(key, value) def _setitem_array(self, key, value): # also raises Exception if object array with NA values @@ -3123,7 +3123,7 @@ def _setitem_array(self, key, value): key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] self._check_setitem_copy() - self.iloc._setitem_with_indexer(indexer, value, self.iloc.name) + self.iloc._setitem_with_indexer(indexer, value) else: if isinstance(value, DataFrame): if len(value.columns) != len(key): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 5be3c86b228c3..47e8c6e56f8c1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1525,7 +1525,7 @@ def _get_setitem_indexer(self, key): # ------------------------------------------------------------------- - def _setitem_with_indexer(self, indexer, value, name): + def _setitem_with_indexer(self, indexer, value, name = "iloc"): """ _setitem_with_indexer is for setting values on a Series/DataFrame using positional indexers. From 689a1abb1138525d287a263a7e5fe78db4533697 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 22:11:49 +0100 Subject: [PATCH 06/16] Simplify for loop --- pandas/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 47e8c6e56f8c1..4ed7e792c4dca 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1747,8 +1747,8 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name): unique_cols = value.columns.is_unique if name == "iloc": - for index, loc in zip(range(len(ilocs)), ilocs): - val = value.iloc[:, index] + for i, loc in enumerate(ilocs): + val = value.iloc[:, i] self._setitem_single_column(loc, val, plane_indexer) elif not unique_cols and value.columns.equals(self.obj.columns): From 23dab4a5fea3c3f6226243dedeb7552f67adfacb Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 22:13:21 +0100 Subject: [PATCH 07/16] Delete spaces --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3c0c40a2b65e5..674ede0c94581 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1517,7 +1517,7 @@ def _get_setitem_indexer(self, key): # ------------------------------------------------------------------- - def _setitem_with_indexer(self, indexer, value, name = "iloc"): + def _setitem_with_indexer(self, indexer, value, name="iloc"): """ _setitem_with_indexer is for setting values on a Series/DataFrame using positional indexers. From 48e0d2566570e8928078ac8ba43a57b80e76e7e1 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 22:49:17 +0100 Subject: [PATCH 08/16] Rename plane indexer --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 674ede0c94581..f374e879de9ff 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1727,7 +1727,7 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name): if name == "iloc": for i, loc in enumerate(ilocs): val = value.iloc[:, i] - self._setitem_single_column(loc, val, plane_indexer) + self._setitem_single_column(loc, val, pi) elif not unique_cols and value.columns.equals(self.obj.columns): # We assume we are already aligned, see From 8ff2430ae817595772e98c8212d1440ea88f21a6 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 23:14:22 +0100 Subject: [PATCH 09/16] Set to default --- pandas/core/frame.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b81341d75f257..abf9b3d8823aa 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3135,9 +3135,7 @@ def _setitem_array(self, key, value): key, axis=1, raise_missing=False )[1] self._check_setitem_copy() - self.iloc._setitem_with_indexer( - (slice(None), indexer), value, self.iloc.name - ) + self.iloc._setitem_with_indexer((slice(None), indexer), value) def _setitem_frame(self, key, value): # support boolean setting with DataFrame input, e.g. From 8a697f7dab4ac9811b8b14816a8dbcc4ab00c056 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 23:16:27 +0100 Subject: [PATCH 10/16] Move tests --- pandas/tests/frame/indexing/test_setitem.py | 17 ------------- pandas/tests/indexing/test_iloc.py | 25 ++++++++++++++++++++ pandas/tests/series/indexing/test_setitem.py | 9 ------- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index af923bf9b8f2b..e4c57dc2b72fc 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -289,23 +289,6 @@ def test_setitem_periodindex(self): assert isinstance(rs.index, PeriodIndex) tm.assert_index_equal(rs.index, rng) - @pytest.mark.parametrize("klass", [list, np.array]) - def test_iloc_setitem_bool_indexer(self, klass): - # GH: 36741 - df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) - indexer = klass([True, False, False]) - df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 - expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) - tm.assert_frame_equal(df, expected) - - def test_setitem_iloc_pure_position_based(self): - # GH: 22046 - df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]}) - df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) - df2.iloc[:, [1]] = df1.iloc[:, [0]] - expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]}) - tm.assert_frame_equal(df2, expected) - class TestDataFrameSetItemSlicing: def test_setitem_slice_position(self): diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 692ab6e779c7d..0f7ae6a371a67 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -808,6 +808,23 @@ def test_iloc_assign_series_to_df_cell(self): expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0]) tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("klass", [list, np.array]) + def test_iloc_setitem_bool_indexer(self, klass): + # GH: 36741 + df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) + indexer = klass([True, False, False]) + df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 + expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) + tm.assert_frame_equal(df, expected) + + def test_setitem_iloc_pure_position_based(self): + # GH: 22046 + df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]}) + df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + df2.iloc[:, [1]] = df1.iloc[:, [0]] + expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]}) + tm.assert_frame_equal(df2, expected) + class TestILocErrors: # NB: this test should work for _any_ Series we can pass as @@ -973,3 +990,11 @@ def test_iloc(self): def test_iloc_getitem_nonunique(self): ser = Series([0, 1, 2], index=[0, 1, 0]) assert ser.iloc[2] == 2 + + def test_setitem_iloc_pure_position_based(self): + # GH: 22046 + ser1 = Series([1, 2, 3]) + ser2 = Series([4, 5, 6], index=[1, 0, 2]) + ser1.iloc[1:3] = ser2.iloc[1:3] + expected = Series([1, 5, 6]) + tm.assert_series_equal(ser1, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index cbc9a60c95524..119019da529e4 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -282,12 +282,3 @@ def test_setitem_slice_into_readonly_backing_data(): series[1:3] = 1 assert not array.any() - - -def test_setitem_iloc_pure_position_based(): - # GH: 22046 - ser1 = Series([1, 2, 3]) - ser2 = Series([4, 5, 6], index=[1, 0, 2]) - ser1.iloc[1:3] = ser2.iloc[1:3] - expected = Series([1, 5, 6]) - tm.assert_series_equal(ser1, expected) From 3bef35a999534dff26a6ed8fb33a2275c831708a Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 23:25:21 +0100 Subject: [PATCH 11/16] Add test --- pandas/tests/indexing/test_iloc.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 0f7ae6a371a67..b6f6dfebeff57 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -810,18 +810,19 @@ def test_iloc_assign_series_to_df_cell(self): @pytest.mark.parametrize("klass", [list, np.array]) def test_iloc_setitem_bool_indexer(self, klass): - # GH: 36741 + # GH#36741 df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) indexer = klass([True, False, False]) df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) tm.assert_frame_equal(df, expected) - def test_setitem_iloc_pure_position_based(self): - # GH: 22046 + @pytest.mark.parametrize("indexer", [[1], slice(1, 2)]) + def test_setitem_iloc_pure_position_based(self, indexer): + # GH#22046 df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]}) df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) - df2.iloc[:, [1]] = df1.iloc[:, [0]] + df2.iloc[:, indexer] = df1.iloc[:, [0]] expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]}) tm.assert_frame_equal(df2, expected) @@ -992,7 +993,7 @@ def test_iloc_getitem_nonunique(self): assert ser.iloc[2] == 2 def test_setitem_iloc_pure_position_based(self): - # GH: 22046 + # GH#22046 ser1 = Series([1, 2, 3]) ser2 = Series([4, 5, 6], index=[1, 0, 2]) ser1.iloc[1:3] = ser2.iloc[1:3] From 7375f222ca946ca74d024eb21c700e8fa2029c01 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 23:32:09 +0100 Subject: [PATCH 12/16] Add test for dict case --- pandas/core/indexing.py | 3 +-- pandas/tests/indexing/test_iloc.py | 7 +++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f374e879de9ff..378187ae769ca 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1820,8 +1820,7 @@ def _setitem_single_block(self, indexer, value, name): return indexer = maybe_convert_ix(*indexer) - - if isinstance(value, (ABCSeries, dict)) and name != "iloc": + if isinstance(value, ABCSeries) and name != "iloc" or isinstance(value, dict): # TODO(EA): ExtensionBlock.setitem this causes issues with # setting for extensionarrays that store dicts. Need to decide # if it's worth supporting that. diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index b6f6dfebeff57..343c58721f412 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -826,6 +826,13 @@ def test_setitem_iloc_pure_position_based(self, indexer): expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]}) tm.assert_frame_equal(df2, expected) + def test_setitem_iloc_dictionary_value(self): + # GH#37728 + df = DataFrame({"x": [1, 2], "y": [2, 2]}) + rhs = dict(x=9, y=99) + df.iloc[1] = rhs + expected = DataFrame({"x": [1, 9], "y": [2, 99]}) + class TestILocErrors: # NB: this test should work for _any_ Series we can pass as From e58fbc88d4a70491b094a0525b8ef810fe0bffe4 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 23:32:26 +0100 Subject: [PATCH 13/16] Add tm --- pandas/tests/indexing/test_iloc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 343c58721f412..84073bbb023a8 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -832,6 +832,7 @@ def test_setitem_iloc_dictionary_value(self): rhs = dict(x=9, y=99) df.iloc[1] = rhs expected = DataFrame({"x": [1, 9], "y": [2, 99]}) + tm.assert_frame_equal(df, expected) class TestILocErrors: From 55e9403adad987350933d8833cc4f9208e355f1d Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 18 Nov 2020 21:06:06 +0100 Subject: [PATCH 14/16] Type and refactor whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/indexing.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 0c98acf0f894a..1f05c35097b22 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -589,7 +589,7 @@ Indexing - Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`) - Bug in :meth:`DataFrame.reindex` raising ``IndexingError`` wrongly for empty :class:`DataFrame` with ``tolerance`` not None or ``method="nearest"`` (:issue:`27315`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`) -- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligned objects in ``__setitem__`` (:issue:`22046`) +- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`) Missing ^^^^^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 378187ae769ca..2d3b2aa50c2cf 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1628,7 +1628,7 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"): else: self._setitem_single_block(indexer, value, name) - def _setitem_with_indexer_split_path(self, indexer, value, name): + def _setitem_with_indexer_split_path(self, indexer, value, name: str): """ Setitem column-wise. """ @@ -1714,7 +1714,7 @@ def _setitem_with_indexer_2d_value(self, indexer, value): # setting with a list, re-coerces self._setitem_single_column(loc, value[:, i].tolist(), pi) - def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name): + def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name: str): ilocs = self._ensure_iterable_column_indexer(indexer[1]) sub_indexer = list(indexer) @@ -1792,7 +1792,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): # reset the sliced object if unique self.obj._iset_item(loc, ser) - def _setitem_single_block(self, indexer, value, name): + def _setitem_single_block(self, indexer, value, name: str): """ _setitem_with_indexer for the case when we have a single Block. """ @@ -1837,7 +1837,7 @@ def _setitem_single_block(self, indexer, value, name): self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value) self.obj._maybe_update_cacher(clear=True) - def _setitem_with_indexer_missing(self, indexer, value, name): + def _setitem_with_indexer_missing(self, indexer, value, name: str): """ Insert new row(s) or column(s) into the Series or DataFrame. """ From 70169777e8749408c4d18140b8c6d61e6450c861 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 18 Nov 2020 21:08:21 +0100 Subject: [PATCH 15/16] Add comment --- pandas/core/indexing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2d3b2aa50c2cf..6621b1772ac93 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1724,6 +1724,7 @@ def _setitem_with_indexer_frame_value(self, indexer, value: "DataFrame", name: s unique_cols = value.columns.is_unique + # We do not want to align the value in case of iloc GH#37728 if name == "iloc": for i, loc in enumerate(ilocs): val = value.iloc[:, i] From b965eeeebc8e6ead13c8bd4bb0c96f6593cf8e4e Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 18 Nov 2020 21:26:19 +0100 Subject: [PATCH 16/16] Remove name in function --- pandas/core/indexing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6621b1772ac93..9bfbc22b1e628 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1618,7 +1618,7 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"): indexer, missing = convert_missing_indexer(indexer) if missing: - self._setitem_with_indexer_missing(indexer, value, name) + self._setitem_with_indexer_missing(indexer, value) return # align and set the values @@ -1838,7 +1838,7 @@ def _setitem_single_block(self, indexer, value, name: str): self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value) self.obj._maybe_update_cacher(clear=True) - def _setitem_with_indexer_missing(self, indexer, value, name: str): + def _setitem_with_indexer_missing(self, indexer, value): """ Insert new row(s) or column(s) into the Series or DataFrame. """ @@ -1859,7 +1859,7 @@ def _setitem_with_indexer_missing(self, indexer, value, name: str): if index.is_unique: new_indexer = index.get_indexer([new_index[-1]]) if (new_indexer != -1).any(): - return self._setitem_with_indexer(new_indexer, value, name) + return self._setitem_with_indexer(new_indexer, value, "loc") # this preserves dtype of the value new_values = Series([value])._values