From 2703e613ae40cb6d42ca7352d82ff16fa6e75f48 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 8 Jun 2021 21:54:28 +0200 Subject: [PATCH 1/6] REF: avoid mutating Series._values directly in setitem but defer to Manager method --- pandas/core/internals/array_manager.py | 19 +++++++++++++++++-- pandas/core/internals/managers.py | 21 +++++++++++++++++++-- pandas/core/series.py | 5 ++--- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 31e32b053367b..b608aa0e438b1 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -1286,8 +1286,23 @@ def apply(self, func, **kwargs): new_array = getattr(self.array, func)(**kwargs) return type(self)([new_array], self._axes) - def setitem(self, indexer, value): - return self.apply_with_block("setitem", indexer=indexer, value=value) + def setitem(self, indexer, value, inplace: bool = False): + """ + Set values with indexer. + + For SingleArrayManager, this backs s[indexer] = value + + Parameters + ---------- + indexer, value + inplace : bool, default False + If True (for a Series), mutate the manager/values in place, not + returning a new Manager (and thus never changing the dtype). + """ + if inplace: + self.arrays[0][indexer] = value + else: + return self.apply_with_block("setitem", indexer=indexer, value=value) def idelete(self, indexer) -> SingleArrayManager: """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 323aa45874d96..2ec873dd0baac 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -351,8 +351,25 @@ def where(self: T, other, cond, align: bool, errors: str) -> T: errors=errors, ) - def setitem(self: T, indexer, value) -> T: - return self.apply("setitem", indexer=indexer, value=value) + def setitem(self: T, indexer, value, inplace: bool = False) -> T: + """ + Set values with indexer. + + For SingleBlockManager, this backs s[indexer] = value + + Parameters + ---------- + indexer, value + inplace : bool, default False + If True (for a Series), mutate the block/values in place, not + returning a new Manager/Block (and thus never changing the dtype). + """ + if inplace: + # only passed for Series (single block) + assert self.ndim == 1 + self._block.values[indexer] = value + else: + return self.apply("setitem", indexer=indexer, value=value) def putmask(self, mask, new, align: bool = True): diff --git a/pandas/core/series.py b/pandas/core/series.py index 2f45a2adbdec7..e78f54c361fed 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1064,10 +1064,9 @@ def __setitem__(self, key, value) -> None: try: self._set_with_engine(key, value) except (KeyError, ValueError): - values = self._values if is_integer(key) and self.index.inferred_type != "integer": # positional setter - values[key] = value + self._mgr.setitem(key, value, inplace=True) else: # GH#12862 adding a new key to the Series self.loc[key] = value @@ -1099,7 +1098,7 @@ def _set_with_engine(self, key, value) -> None: # error: Argument 1 to "validate_numeric_casting" has incompatible type # "Union[dtype, ExtensionDtype]"; expected "dtype" validate_numeric_casting(self.dtype, value) # type: ignore[arg-type] - self._values[loc] = value + self._mgr.setitem(loc, value, inplace=True) def _set_with(self, key, value): # other: fancy integer or otherwise From 2d2000c7f0b9ea990cc1e9db62265ad94319611e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 9 Jun 2021 13:47:58 +0200 Subject: [PATCH 2/6] add comment + return statements --- pandas/core/internals/array_manager.py | 1 + pandas/core/internals/managers.py | 3 ++- pandas/core/series.py | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index b608aa0e438b1..157dc07d38368 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -1301,6 +1301,7 @@ def setitem(self, indexer, value, inplace: bool = False): """ if inplace: self.arrays[0][indexer] = value + return else: return self.apply_with_block("setitem", indexer=indexer, value=value) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2ec873dd0baac..9a14dbb95f3ef 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -367,7 +367,8 @@ def setitem(self: T, indexer, value, inplace: bool = False) -> T: if inplace: # only passed for Series (single block) assert self.ndim == 1 - self._block.values[indexer] = value + self._block.values[indexer] = value # type: ignore[attr-defined] + return else: return self.apply("setitem", indexer=indexer, value=value) diff --git a/pandas/core/series.py b/pandas/core/series.py index e78f54c361fed..f3095f2c38d80 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1066,6 +1066,7 @@ def __setitem__(self, key, value) -> None: except (KeyError, ValueError): if is_integer(key) and self.index.inferred_type != "integer": # positional setter + # this is equivalent to self._values[key] = value self._mgr.setitem(key, value, inplace=True) else: # GH#12862 adding a new key to the Series @@ -1098,6 +1099,7 @@ def _set_with_engine(self, key, value) -> None: # error: Argument 1 to "validate_numeric_casting" has incompatible type # "Union[dtype, ExtensionDtype]"; expected "dtype" validate_numeric_casting(self.dtype, value) # type: ignore[arg-type] + # this is equivalent to self._values[key] = value self._mgr.setitem(loc, value, inplace=True) def _set_with(self, key, value): From 8777b97b9a2a1c9b41a691ef25d9fa3d6fd966ed Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 9 Jun 2021 13:53:06 +0200 Subject: [PATCH 3/6] new setitem_inplace method instead of option --- pandas/core/internals/array_manager.py | 24 ++++++++++---------- pandas/core/internals/managers.py | 31 +++++++++++++------------- pandas/core/series.py | 4 ++-- 3 files changed, 29 insertions(+), 30 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 157dc07d38368..4f030aa4ff004 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -1286,24 +1286,24 @@ def apply(self, func, **kwargs): new_array = getattr(self.array, func)(**kwargs) return type(self)([new_array], self._axes) - def setitem(self, indexer, value, inplace: bool = False): + def setitem(self, indexer, value): """ Set values with indexer. For SingleArrayManager, this backs s[indexer] = value + """ + return self.apply_with_block("setitem", indexer=indexer, value=value) - Parameters - ---------- - indexer, value - inplace : bool, default False - If True (for a Series), mutate the manager/values in place, not - returning a new Manager (and thus never changing the dtype). + def setitem_inplace(self, indexer, value): """ - if inplace: - self.arrays[0][indexer] = value - return - else: - return self.apply_with_block("setitem", indexer=indexer, value=value) + Set values with indexer. + + For SingleArrayManager, this backs s[indexer] = value. + + This is an inplace version of `setitem`, mutating the manager/values + in place, not returning a new Manager (and thus never changing the dtype). + """ + self.arrays[0][indexer] = value def idelete(self, indexer) -> SingleArrayManager: """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 9a14dbb95f3ef..f872a78c59636 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -351,26 +351,13 @@ def where(self: T, other, cond, align: bool, errors: str) -> T: errors=errors, ) - def setitem(self: T, indexer, value, inplace: bool = False) -> T: + def setitem(self: T, indexer, value) -> T: """ Set values with indexer. For SingleBlockManager, this backs s[indexer] = value - - Parameters - ---------- - indexer, value - inplace : bool, default False - If True (for a Series), mutate the block/values in place, not - returning a new Manager/Block (and thus never changing the dtype). - """ - if inplace: - # only passed for Series (single block) - assert self.ndim == 1 - self._block.values[indexer] = value # type: ignore[attr-defined] - return - else: - return self.apply("setitem", indexer=indexer, value=value) + """ + return self.apply("setitem", indexer=indexer, value=value) def putmask(self, mask, new, align: bool = True): @@ -1648,6 +1635,18 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: new_index = self.index._getitem_slice(slobj) return type(self)(block, new_index) + def setitem_inplace(self, indexer, value) -> SingleBlockManager: + """ + Set values with indexer. + + For SingleBlockManager, this backs s[indexer] = value + + This is an inplace version of `setitem`, mutating the block/values + in place, not returning a new Manager/Block (and thus never changing + the dtype). + """ + self._block.values[indexer] = value + @property def index(self) -> Index: return self.axes[0] diff --git a/pandas/core/series.py b/pandas/core/series.py index f3095f2c38d80..a3077488a0464 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1067,7 +1067,7 @@ def __setitem__(self, key, value) -> None: if is_integer(key) and self.index.inferred_type != "integer": # positional setter # this is equivalent to self._values[key] = value - self._mgr.setitem(key, value, inplace=True) + self._mgr.setitem_inplace(key, value) else: # GH#12862 adding a new key to the Series self.loc[key] = value @@ -1100,7 +1100,7 @@ def _set_with_engine(self, key, value) -> None: # "Union[dtype, ExtensionDtype]"; expected "dtype" validate_numeric_casting(self.dtype, value) # type: ignore[arg-type] # this is equivalent to self._values[key] = value - self._mgr.setitem(loc, value, inplace=True) + self._mgr.setitem_inplace(loc, value) def _set_with(self, key, value): # other: fancy integer or otherwise From 7364243b7dafb249fb1792b4c70cf7a09de62bf2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 9 Jun 2021 16:04:48 +0200 Subject: [PATCH 4/6] fixup return type --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f872a78c59636..c31f8b6ad7146 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1635,7 +1635,7 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: new_index = self.index._getitem_slice(slobj) return type(self)(block, new_index) - def setitem_inplace(self, indexer, value) -> SingleBlockManager: + def setitem_inplace(self, indexer, value): """ Set values with indexer. From 6147fc88ad2a25c809fb8e169151cc33732655ed Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 4 Aug 2021 15:41:52 +0200 Subject: [PATCH 5/6] move setitem_inplace to SingleDataManager base class --- pandas/core/internals/array_manager.py | 14 +++----------- pandas/core/internals/base.py | 12 ++++++++++++ pandas/core/internals/managers.py | 12 ------------ 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 9feb14fdf9561..08608ae85428d 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -1281,19 +1281,11 @@ def setitem(self, indexer, value): Set values with indexer. For SingleArrayManager, this backs s[indexer] = value - """ - return self.apply_with_block("setitem", indexer=indexer, value=value) - def setitem_inplace(self, indexer, value): + See `setitem_inplace` for a version that works inplace and doesn't + return a new Manager. """ - Set values with indexer. - - For SingleArrayManager, this backs s[indexer] = value. - - This is an inplace version of `setitem`, mutating the manager/values - in place, not returning a new Manager (and thus never changing the dtype). - """ - self.arrays[0][indexer] = value + return self.apply_with_block("setitem", indexer=indexer, value=value) def idelete(self, indexer) -> SingleArrayManager: """ diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index e65318dd29c52..14703ae943e56 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -159,6 +159,18 @@ def array(self): """ return self.arrays[0] # type: ignore[attr-defined] + def setitem_inplace(self, indexer, value): + """ + Set values with indexer. + + For Single[Block/Array]Manager, this backs s[indexer] = value + + This is an inplace version of `setitem()`, mutating the manager/values + in place, not returning a new Manager (and Block), and thus never changing + the dtype. + """ + self.array[indexer] = value + def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None: """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 471ee6794cb24..7c8b289e6eb87 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1691,18 +1691,6 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: new_index = self.index._getitem_slice(slobj) return type(self)(block, new_index) - def setitem_inplace(self, indexer, value): - """ - Set values with indexer. - - For SingleBlockManager, this backs s[indexer] = value - - This is an inplace version of `setitem`, mutating the block/values - in place, not returning a new Manager/Block (and thus never changing - the dtype). - """ - self._block.values[indexer] = value - @property def index(self) -> Index: return self.axes[0] From 81904b4eaa3b5a0dddac4579ea4959836fc30219 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 5 Aug 2021 09:57:50 +0200 Subject: [PATCH 6/6] add return type to setitem_inplace --- pandas/core/internals/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 14703ae943e56..4d3dcb9c4732e 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -159,7 +159,7 @@ def array(self): """ return self.arrays[0] # type: ignore[attr-defined] - def setitem_inplace(self, indexer, value): + def setitem_inplace(self, indexer, value) -> None: """ Set values with indexer.