From 0e4c58eda77fab1550e8fbd4b78d9ef247000b13 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 20 May 2022 18:01:26 +0200 Subject: [PATCH 01/12] REF: Add Manager.column_setitem to set values into a single column (without intermediate series) --- pandas/core/frame.py | 16 ++++++------ pandas/core/indexing.py | 28 ++++++--------------- pandas/core/internals/array_manager.py | 14 +++++++++++ pandas/core/internals/managers.py | 11 ++++++++ pandas/tests/frame/indexing/test_setitem.py | 2 +- pandas/tests/indexing/test_partial.py | 4 ++- 6 files changed, 45 insertions(+), 30 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 51ca9dbd763b4..4a018353af1d1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3924,16 +3924,16 @@ def _set_value( Sets whether or not index/col interpreted as indexers """ try: - if takeable: - series = self._ixs(col, axis=1) - loc = index - else: - series = self._get_item_cache(col) - loc = self.index.get_loc(index) - # setitem_inplace will do validation that may raise TypeError, # ValueError, or LossySetitemError - series._mgr.setitem_inplace(loc, value) + # breakpoint() + if takeable: + self._mgr.column_setitem(col, index, value) + else: + icol = self.columns.get_loc(col) + index = self.index.get_loc(index) + self._mgr.column_setitem(icol, index, value) + self._clear_item_cache() except (KeyError, TypeError, ValueError, LossySetitemError): # set using a non-recursive method & reset the cache diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 02c095202d079..25e5a8dbf01d3 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -52,7 +52,6 @@ from pandas.core.indexers import ( check_array_indexer, is_empty_indexer, - is_exact_shape_match, is_list_like_indexer, is_scalar_indexer, length_of_indexer, @@ -1936,42 +1935,31 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): """ pi = plane_indexer - ser = self.obj._ixs(loc, axis=1) - # perform the equivalent of a setitem on the info axis # as we have a null slice or a slice with full bounds # which means essentially reassign to the columns of a # multi-dim object # GH#6149 (null slice), GH#10408 (full bounds) if com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj)): - ser = value + self.obj._iset_item(loc, value) elif ( is_array_like(value) - and is_exact_shape_match(ser, value) + and len(value.shape) > 0 + and self.obj.shape[0] == value.shape[0] and not is_empty_indexer(pi) ): if is_list_like(pi): - ser = value[np.argsort(pi)] + value = value[np.argsort(pi)] else: # in case of slice - ser = value[pi] + value = value[pi] + self.obj._iset_item(loc, value) else: # set the item, first attempting to operate inplace, then # falling back to casting if necessary; see # _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace - - orig_values = ser._values - ser._mgr = ser._mgr.setitem((pi,), value) - - if ser._values is orig_values: - # The setitem happened inplace, so the DataFrame's values - # were modified inplace. - return - self.obj._iset_item(loc, ser) - return - - # reset the sliced object if unique - self.obj._iset_item(loc, ser) + self.obj._mgr.column_setitem(loc, plane_indexer, value) + self.obj._clear_item_cache() def _setitem_single_block(self, indexer, value, name: str): """ diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 7c877af0585e7..39dba913f54f3 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -869,6 +869,20 @@ def iset( self.arrays[mgr_idx] = value_arr return + def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None: + """ + Set values ("setitem") into a single column (not setting the full column). + + This is a method on the ArrayManager level, to avoid creating an + intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) + """ + arr = self.arrays[loc] + # create temporary SingleArrayManager without ref to use setitem implementation + mgr = SingleArrayManager([arr], [self._axes[0]]) + new_mgr = mgr.setitem((idx,), value) + # update existing ArrayManager in-place + self.arrays[loc] = new_mgr.arrays[0] + def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: """ Insert item at selected position. diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2e638f5b0fb3d..34261320fda27 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1185,6 +1185,17 @@ def _iset_single( self.blocks = new_blocks return + def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None: + """ + Set values ("setitem") into a single column (not setting the full column). + + This is a method on the BlockManager level, to avoid creating an + intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) + """ + col_mgr = self.iget(loc) + new_mgr = col_mgr.setitem((idx,), value) + self.iset(loc, new_mgr._block.values, inplace=True) + def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: """ Insert item at selected position. diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index f1e7b18a73173..46dabd0acfdf7 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1082,7 +1082,7 @@ def test_setitem_partial_column_inplace(self, consolidate, using_array_manager): tm.assert_numpy_array_equal(zvals, expected.values) assert np.shares_memory(zvals, df["z"]._values) if not consolidate: - assert df["z"]._values is zvals + assert df["z"]._values.base is zvals.base def test_setitem_duplicate_columns_not_inplace(self): # GH#39510 diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 8251f09b97062..cef21bf042eec 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -266,7 +266,7 @@ def test_partial_setting(self): with pytest.raises(IndexError, match=msg): s.iat[3] = 5.0 - def test_partial_setting_frame(self): + def test_partial_setting_frame(self, using_array_manager): df_orig = DataFrame( np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64" ) @@ -279,6 +279,8 @@ def test_partial_setting_frame(self): df.iloc[4, 2] = 5.0 msg = "index 2 is out of bounds for axis 0 with size 2" + if using_array_manager: + msg = "list index out of range" with pytest.raises(IndexError, match=msg): df.iat[4, 2] = 5.0 From a2aa8aa8517a1970224e2073eda708f01988afe0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 20 May 2022 19:40:31 +0200 Subject: [PATCH 02/12] handle typing --- pandas/core/frame.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4a018353af1d1..a121df94b654f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3924,15 +3924,17 @@ def _set_value( Sets whether or not index/col interpreted as indexers """ try: - # setitem_inplace will do validation that may raise TypeError, + # setitem will do validation that may raise TypeError, # ValueError, or LossySetitemError - # breakpoint() if takeable: - self._mgr.column_setitem(col, index, value) + # error: Argument 2 to "column_setitem" of "BlockManager" has + # incompatible type "Union[Hashable, Sequence[Hashable]]"; + # expected "Union[int, slice, ndarray[Any, Any]]" + self._mgr.column_setitem(col, index, value) # type: ignore[arg-type] else: icol = self.columns.get_loc(col) index = self.index.get_loc(index) - self._mgr.column_setitem(icol, index, value) + self._mgr.column_setitem(icol, index, value) # type: ignore[arg-type] self._clear_item_cache() except (KeyError, TypeError, ValueError, LossySetitemError): From 103d1fe014e6d7fa847e824fa65f71e4a4da4de7 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 22 May 2022 12:45:56 +0200 Subject: [PATCH 03/12] dedent in _set_value --- pandas/core/frame.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 34314a49f78ed..5387249524a87 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3924,14 +3924,15 @@ def _set_value( # setitem will do validation that may raise TypeError, # ValueError, or LossySetitemError if takeable: - # error: Argument 2 to "column_setitem" of "BlockManager" has - # incompatible type "Union[Hashable, Sequence[Hashable]]"; - # expected "Union[int, slice, ndarray[Any, Any]]" - self._mgr.column_setitem(col, index, value) # type: ignore[arg-type] + icol = col + iindex = index else: icol = self.columns.get_loc(col) - index = self.index.get_loc(index) - self._mgr.column_setitem(icol, index, value) # type: ignore[arg-type] + iindex = self.index.get_loc(index) + # error: Argument 2 to "column_setitem" of "BlockManager" has + # incompatible type "Union[Hashable, Sequence[Hashable]]"; + # expected "Union[int, slice, ndarray[Any, Any]]" + self._mgr.column_setitem(icol, iindex, value) # type: ignore[arg-type] self._clear_item_cache() except (KeyError, TypeError, ValueError, LossySetitemError): From 453eaba2f6272f89f6ebe33173384bafdcfa9038 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 23 May 2022 10:21:30 +0200 Subject: [PATCH 04/12] add inplace keyword --- pandas/core/frame.py | 5 +++-- pandas/core/internals/array_manager.py | 13 +++++++++---- pandas/core/internals/managers.py | 11 ++++++++--- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5387249524a87..8eeefbdb05acb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3932,8 +3932,9 @@ def _set_value( # error: Argument 2 to "column_setitem" of "BlockManager" has # incompatible type "Union[Hashable, Sequence[Hashable]]"; # expected "Union[int, slice, ndarray[Any, Any]]" - self._mgr.column_setitem(icol, iindex, value) # type: ignore[arg-type] - self._clear_item_cache() + self._mgr.column_setitem( # type: ignore[arg-type] + icol, iindex, value, inplace=True + ) except (KeyError, TypeError, ValueError, LossySetitemError): # set using a non-recursive method & reset the cache diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index fd7ff8010d035..06b62b0165618 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -869,7 +869,9 @@ def iset( self.arrays[mgr_idx] = value_arr return - def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None: + def column_setitem( + self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False + ) -> None: """ Set values ("setitem") into a single column (not setting the full column). @@ -879,9 +881,12 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None arr = self.arrays[loc] # create temporary SingleArrayManager without ref to use setitem implementation mgr = SingleArrayManager([arr], [self._axes[0]]) - new_mgr = mgr.setitem((idx,), value) - # update existing ArrayManager in-place - self.arrays[loc] = new_mgr.arrays[0] + if inplace: + mgr.setitem_inplace(idx, value) + else: + new_mgr = mgr.setitem((idx,), value) + # update existing ArrayManager in-place + self.arrays[loc] = new_mgr.arrays[0] def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 7cccc9833de6b..badccd5a15856 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1188,7 +1188,9 @@ def _iset_single( self.blocks = new_blocks return - def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None: + def column_setitem( + self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False + ) -> None: """ Set values ("setitem") into a single column (not setting the full column). @@ -1196,8 +1198,11 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) """ col_mgr = self.iget(loc) - new_mgr = col_mgr.setitem((idx,), value) - self.iset(loc, new_mgr._block.values, inplace=True) + if inplace: + col_mgr.setitem_inplace(idx, value) + else: + new_mgr = col_mgr.setitem((idx,), value) + self.iset(loc, new_mgr._block.values, inplace=True) def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: """ From be740ad175f2a6f942819012660ac2da51af4646 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 23 May 2022 10:30:11 +0200 Subject: [PATCH 05/12] update typing --- pandas/core/frame.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8eeefbdb05acb..9c740a943a80f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3925,16 +3925,11 @@ def _set_value( # ValueError, or LossySetitemError if takeable: icol = col - iindex = index + iindex = cast(int, index) else: icol = self.columns.get_loc(col) iindex = self.index.get_loc(index) - # error: Argument 2 to "column_setitem" of "BlockManager" has - # incompatible type "Union[Hashable, Sequence[Hashable]]"; - # expected "Union[int, slice, ndarray[Any, Any]]" - self._mgr.column_setitem( # type: ignore[arg-type] - icol, iindex, value, inplace=True - ) + self._mgr.column_setitem(icol, iindex, value, inplace=True) except (KeyError, TypeError, ValueError, LossySetitemError): # set using a non-recursive method & reset the cache From e63c7f67da94a03fb3493c47a95c66441c01841c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 24 May 2022 23:41:31 +0200 Subject: [PATCH 06/12] Revert "add inplace keyword" This reverts commit 453eaba2f6272f89f6ebe33173384bafdcfa9038. --- pandas/core/frame.py | 3 ++- pandas/core/internals/array_manager.py | 13 ++++--------- pandas/core/internals/managers.py | 11 +++-------- 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9c740a943a80f..2b7bd408634b6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3929,7 +3929,8 @@ def _set_value( else: icol = self.columns.get_loc(col) iindex = self.index.get_loc(index) - self._mgr.column_setitem(icol, iindex, value, inplace=True) + self._mgr.column_setitem(icol, iindex, value) + self._clear_item_cache() except (KeyError, TypeError, ValueError, LossySetitemError): # set using a non-recursive method & reset the cache diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 06b62b0165618..fd7ff8010d035 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -869,9 +869,7 @@ def iset( self.arrays[mgr_idx] = value_arr return - def column_setitem( - self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False - ) -> None: + def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None: """ Set values ("setitem") into a single column (not setting the full column). @@ -881,12 +879,9 @@ def column_setitem( arr = self.arrays[loc] # create temporary SingleArrayManager without ref to use setitem implementation mgr = SingleArrayManager([arr], [self._axes[0]]) - if inplace: - mgr.setitem_inplace(idx, value) - else: - new_mgr = mgr.setitem((idx,), value) - # update existing ArrayManager in-place - self.arrays[loc] = new_mgr.arrays[0] + new_mgr = mgr.setitem((idx,), value) + # update existing ArrayManager in-place + self.arrays[loc] = new_mgr.arrays[0] def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index badccd5a15856..7cccc9833de6b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1188,9 +1188,7 @@ def _iset_single( self.blocks = new_blocks return - def column_setitem( - self, loc: int, idx: int | slice | np.ndarray, value, inplace: bool = False - ) -> None: + def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None: """ Set values ("setitem") into a single column (not setting the full column). @@ -1198,11 +1196,8 @@ def column_setitem( intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) """ col_mgr = self.iget(loc) - if inplace: - col_mgr.setitem_inplace(idx, value) - else: - new_mgr = col_mgr.setitem((idx,), value) - self.iset(loc, new_mgr._block.values, inplace=True) + new_mgr = col_mgr.setitem((idx,), value) + self.iset(loc, new_mgr._block.values, inplace=True) def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: """ From caf7be81cd239229469930ee5510dd5f49bdfd50 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 31 May 2022 02:21:33 +0200 Subject: [PATCH 07/12] only catch KeyError --- pandas/core/frame.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 098de2ae5ee50..6061d415f3c4b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -98,7 +98,6 @@ ) from pandas.core.dtypes.cast import ( - LossySetitemError, can_hold_element, construct_1d_arraylike_from_scalar, construct_2d_arraylike_from_scalar, @@ -3944,8 +3943,6 @@ def _set_value( Sets whether or not index/col interpreted as indexers """ try: - # setitem will do validation that may raise TypeError, - # ValueError, or LossySetitemError if takeable: icol = col iindex = cast(int, index) @@ -3955,7 +3952,7 @@ def _set_value( self._mgr.column_setitem(icol, iindex, value) self._clear_item_cache() - except (KeyError, TypeError, ValueError, LossySetitemError): + except KeyError: # set using a non-recursive method & reset the cache if takeable: self.iloc[index, col] = value From 8d7ee1a5d2c1c1268ab3826200db72a63b7418d9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 31 May 2022 02:48:37 +0200 Subject: [PATCH 08/12] only stop catching LossySetitemError --- pandas/core/frame.py | 2 +- pandas/tests/indexing/test_at.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6061d415f3c4b..697615f3f4c14 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3952,7 +3952,7 @@ def _set_value( self._mgr.column_setitem(icol, iindex, value) self._clear_item_cache() - except KeyError: + except (KeyError, TypeError, ValueError): # set using a non-recursive method & reset the cache if takeable: self.iloc[index, col] = value diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index a2f02378d061a..96c73b007cef3 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -10,6 +10,7 @@ CategoricalDtype, CategoricalIndex, DataFrame, + MultiIndex, Series, Timestamp, ) @@ -96,6 +97,18 @@ def test_at_setitem_categorical_missing(self): tm.assert_frame_equal(df, expected) + def test_at_setitem_multiindex(self): + df = DataFrame( + np.zeros((3, 2), dtype="int64"), + columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]), + ) + df.at[0, "a"] = 10 + expected = DataFrame( + [[10, 10], [0, 0], [0, 0]], + columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]), + ) + tm.assert_frame_equal(df, expected) + class TestAtSetItemWithExpansion: def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture): From 25e903b718e98ebe4d684df9609ee90f86bc9cdd Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 31 May 2022 02:59:39 +0200 Subject: [PATCH 09/12] small updates --- pandas/core/indexing.py | 5 ++--- pandas/core/internals/array_manager.py | 1 - pandas/tests/frame/indexing/test_setitem.py | 2 -- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 89f4ad5d3d7bd..0ffe00e87f8a2 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1955,9 +1955,8 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): # in case of slice value = value[pi] else: - # set the item, first attempting to operate inplace, then - # falling back to casting if necessary; see - # _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace + # set value into the column (first attempting to operate inplace, then + # falling back to casting if necessary) self.obj._mgr.column_setitem(loc, plane_indexer, value) self.obj._clear_item_cache() return diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index fd7ff8010d035..9f2635537c15d 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -877,7 +877,6 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) """ arr = self.arrays[loc] - # create temporary SingleArrayManager without ref to use setitem implementation mgr = SingleArrayManager([arr], [self._axes[0]]) new_mgr = mgr.setitem((idx,), value) # update existing ArrayManager in-place diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 6d38342115418..bae669518947e 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1090,8 +1090,6 @@ def test_setitem_partial_column_inplace(self, consolidate, using_array_manager): # check setting occurred in-place tm.assert_numpy_array_equal(zvals, expected.values) assert np.shares_memory(zvals, df["z"]._values) - if not consolidate: - assert df["z"]._values.base is zvals.base def test_setitem_duplicate_columns_not_inplace(self): # GH#39510 From 5e30199cb4df4bbb4069ae0103e8a4f257917501 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 31 May 2022 14:03:26 +0200 Subject: [PATCH 10/12] fix AM case --- pandas/core/internals/array_manager.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 9f2635537c15d..ee6c183898079 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -36,6 +36,7 @@ is_datetime64_ns_dtype, is_dtype_equal, is_extension_array_dtype, + is_integer, is_numeric_dtype, is_object_dtype, is_timedelta64_ns_dtype, @@ -876,6 +877,8 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None This is a method on the ArrayManager level, to avoid creating an intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) """ + if not is_integer(loc): + raise TypeError("The column index should be an integer") arr = self.arrays[loc] mgr = SingleArrayManager([arr], [self._axes[0]]) new_mgr = mgr.setitem((idx,), value) From 9d4566ff9a05766b7f836c2365f67fe4ead92376 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 31 May 2022 20:06:01 +0200 Subject: [PATCH 11/12] remove is_exact_shape_match --- pandas/core/indexers/__init__.py | 2 -- pandas/core/indexers/utils.py | 26 +------------------------- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/pandas/core/indexers/__init__.py b/pandas/core/indexers/__init__.py index 86ec36144b134..6431f12a08dc8 100644 --- a/pandas/core/indexers/__init__.py +++ b/pandas/core/indexers/__init__.py @@ -4,7 +4,6 @@ check_setitem_lengths, deprecate_ndim_indexing, is_empty_indexer, - is_exact_shape_match, is_list_like_indexer, is_scalar_indexer, is_valid_positional_slice, @@ -23,7 +22,6 @@ "check_setitem_lengths", "validate_indices", "maybe_convert_indices", - "is_exact_shape_match", "length_of_indexer", "deprecate_ndim_indexing", "unpack_1tuple", diff --git a/pandas/core/indexers/utils.py b/pandas/core/indexers/utils.py index 8c38341e02e34..f098066d1c7d7 100644 --- a/pandas/core/indexers/utils.py +++ b/pandas/core/indexers/utils.py @@ -11,10 +11,7 @@ import numpy as np -from pandas._typing import ( - AnyArrayLike, - ArrayLike, -) +from pandas._typing import AnyArrayLike from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( @@ -294,27 +291,6 @@ def maybe_convert_indices(indices, n: int, verify: bool = True): # Unsorted -def is_exact_shape_match(target: ArrayLike, value: ArrayLike) -> bool: - """ - Is setting this value into this target overwriting the entire column? - - Parameters - ---------- - target : np.ndarray or ExtensionArray - value : np.ndarray or ExtensionArray - - Returns - ------- - bool - """ - return ( - len(value.shape) > 0 - and len(target.shape) > 0 - and value.shape[0] == target.shape[0] - and value.size == target.size - ) - - def length_of_indexer(indexer, target=None) -> int: """ Return the expected length of target[indexer] From ea063e6652c4eb0f096718797699cb5b28027a7e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 1 Jun 2022 00:05:10 +0200 Subject: [PATCH 12/12] add comment about catching the errors --- pandas/core/frame.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 697615f3f4c14..c31de6428b23a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3953,6 +3953,9 @@ def _set_value( self._clear_item_cache() except (KeyError, TypeError, ValueError): + # get_loc might raise a KeyError for missing labels (falling back + # to (i)loc will do expansion of the index) + # column_setitem will do validation that may raise TypeError or ValueError # set using a non-recursive method & reset the cache if takeable: self.iloc[index, col] = value