From 6691cada7ad5e052752942b95b41c462fc0b886e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 31 Mar 2020 16:40:23 -0700 Subject: [PATCH 1/2] REF: Remove BlockManager.set --- pandas/core/frame.py | 8 ++-- pandas/core/generic.py | 19 ++++++--- pandas/core/internals/managers.py | 20 +--------- pandas/tests/internals/test_internals.py | 50 ++++++++++++------------ 4 files changed, 44 insertions(+), 53 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8ada8e830b834..7c361faf2148e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2788,13 +2788,13 @@ def _setitem_frame(self, key, value): self._check_setitem_copy() self._where(-key, value, inplace=True) - def _iset_item(self, loc: int, value): + def _iset_item(self, loc: int, value, clear: bool = True): self._ensure_valid_index(value) # technically _sanitize_column expects a label, not a position, # but the behavior is the same as long as we pass broadcast=False value = self._sanitize_column(loc, value, broadcast=False) - NDFrame._iset_item(self, loc, value) + NDFrame._iset_item(self, loc, value, clear=clear) # check if we are modifying a copy # try to set first as we want an invalid @@ -2802,7 +2802,7 @@ def _iset_item(self, loc: int, value): if len(self): self._check_setitem_copy() - def _set_item(self, key, value): + def _set_item(self, key, value, clear: bool = True): """ Add series to DataFrame in specified column. @@ -2814,7 +2814,7 @@ def _set_item(self, key, value): """ self._ensure_valid_index(value) value = self._sanitize_column(key, value) - NDFrame._set_item(self, key, value) + NDFrame._set_item(self, key, value, clear=clear) # check if we are modifying a copy # try to set first as we want an invalid diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 62f5419c1f4c8..368bd083cccea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3205,7 +3205,7 @@ def _maybe_cache_changed(self, item, value) -> None: """ The object has called back to us saying maybe it has changed. """ - self._data.set(item, value) + NDFrame._set_item(self, item, value, clear=False) @property def _is_cached(self) -> bool_t: @@ -3575,13 +3575,20 @@ def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries: result._set_is_copy(self, copy=is_copy) return result - def _iset_item(self, loc: int, value) -> None: + def _iset_item(self, loc: int, value, clear: bool_t = True) -> None: self._data.iset(loc, value) - self._clear_item_cache() + if clear: + self._clear_item_cache() - def _set_item(self, key, value) -> None: - self._data.set(key, value) - self._clear_item_cache() + def _set_item(self, key, value, clear: bool_t = True) -> None: + try: + loc = self._info_axis.get_loc(key) + except KeyError: + # This item wasn't present, just insert at end + self._data.insert(len(self._info_axis), key, value) + return + + NDFrame._iset_item(self, loc, value, clear=clear) def _set_is_copy(self, ref, copy: bool_t = True) -> None: if not copy: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 182a5b14a1242..e45b61e5c670b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -8,7 +8,7 @@ import numpy as np from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib -from pandas._typing import ArrayLike, DtypeObj, Label +from pandas._typing import ArrayLike, DtypeObj from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -1039,24 +1039,6 @@ def delete(self, item): ) self._rebuild_blknos_and_blklocs() - def set(self, item: Label, value): - """ - Set new item in-place. - - Notes - ----- - Does not consolidate. - Adds new Block if not contained in the current items Index. - """ - try: - loc = self.items.get_loc(item) - except KeyError: - # This item wasn't present, just insert at end - self.insert(len(self.items), item, value) - return - - self.iset(loc, value) - def iset(self, loc: Union[int, slice, np.ndarray], value): """ Set new item in-place. Does not consolidate. Adds new Block if not diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 91ec1c29873cf..d325bc37820a9 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -361,8 +361,8 @@ def test_get(self): def test_set(self): mgr = create_mgr("a,b,c: int", item_shape=(3,)) - mgr.set("d", np.array(["foo"] * 3)) - mgr.set("b", np.array(["bar"] * 3)) + mgr.insert(len(mgr.items), "d", np.array(["foo"] * 3)) + mgr.iset(1, np.array(["bar"] * 3)) tm.assert_numpy_array_equal(mgr.get("a").internal_values(), np.array([0] * 3)) tm.assert_numpy_array_equal( mgr.get("b").internal_values(), np.array(["bar"] * 3, dtype=np.object_) @@ -373,19 +373,19 @@ def test_set(self): ) def test_set_change_dtype(self, mgr): - mgr.set("baz", np.zeros(N, dtype=bool)) + mgr.insert(len(mgr.items), "baz", np.zeros(N, dtype=bool)) - mgr.set("baz", np.repeat("foo", N)) + mgr.iset(mgr.items.get_loc("baz"), np.repeat("foo", N)) assert mgr.get("baz").dtype == np.object_ mgr2 = mgr.consolidate() - mgr2.set("baz", np.repeat("foo", N)) + mgr2.iset(mgr2.items.get_loc("baz"), np.repeat("foo", N)) assert mgr2.get("baz").dtype == np.object_ - mgr2.set("quux", tm.randn(N).astype(int)) + mgr2.insert(len(mgr2.items), "quux", tm.randn(N).astype(int)) assert mgr2.get("quux").dtype == np.int_ - mgr2.set("quux", tm.randn(N)) + mgr2.iset(mgr2.items.get_loc("quux"), tm.randn(N)) assert mgr2.get("quux").dtype == np.float_ def test_copy(self, mgr): @@ -512,9 +512,9 @@ def _compare(old_mgr, new_mgr): # convert mgr = create_mgr("a,b,foo: object; f: i8; g: f8") - mgr.set("a", np.array(["1"] * N, dtype=np.object_)) - mgr.set("b", np.array(["2."] * N, dtype=np.object_)) - mgr.set("foo", np.array(["foo."] * N, dtype=np.object_)) + mgr.iset(0, np.array(["1"] * N, dtype=np.object_)) + mgr.iset(1, np.array(["2."] * N, dtype=np.object_)) + mgr.iset(2, np.array(["foo."] * N, dtype=np.object_)) new_mgr = mgr.convert(numeric=True) assert new_mgr.get("a").dtype == np.int64 assert new_mgr.get("b").dtype == np.float64 @@ -525,9 +525,9 @@ def _compare(old_mgr, new_mgr): mgr = create_mgr( "a,b,foo: object; f: i4; bool: bool; dt: datetime; i: i8; g: f8; h: f2" ) - mgr.set("a", np.array(["1"] * N, dtype=np.object_)) - mgr.set("b", np.array(["2."] * N, dtype=np.object_)) - mgr.set("foo", np.array(["foo."] * N, dtype=np.object_)) + mgr.iset(0, np.array(["1"] * N, dtype=np.object_)) + mgr.iset(1, np.array(["2."] * N, dtype=np.object_)) + mgr.iset(2, np.array(["foo."] * N, dtype=np.object_)) new_mgr = mgr.convert(numeric=True) assert new_mgr.get("a").dtype == np.int64 assert new_mgr.get("b").dtype == np.float64 @@ -615,11 +615,11 @@ def test_interleave_dtype(self, mgr_string, dtype): assert mgr.as_array().dtype == "object" def test_consolidate_ordering_issues(self, mgr): - mgr.set("f", tm.randn(N)) - mgr.set("d", tm.randn(N)) - mgr.set("b", tm.randn(N)) - mgr.set("g", tm.randn(N)) - mgr.set("h", tm.randn(N)) + mgr.iset(mgr.items.get_loc("f"), tm.randn(N)) + mgr.iset(mgr.items.get_loc("d"), tm.randn(N)) + mgr.iset(mgr.items.get_loc("b"), tm.randn(N)) + mgr.iset(mgr.items.get_loc("g"), tm.randn(N)) + mgr.iset(mgr.items.get_loc("h"), tm.randn(N)) # we have datetime/tz blocks in mgr cons = mgr.consolidate() @@ -657,7 +657,7 @@ def test_get_numeric_data(self): "str: object; bool: bool; obj: object; dt: datetime", item_shape=(3,), ) - mgr.set("obj", np.array([1, 2, 3], dtype=np.object_)) + mgr.iset(5, np.array([1, 2, 3], dtype=np.object_)) numeric = mgr.get_numeric_data() tm.assert_index_equal( @@ -668,7 +668,7 @@ def test_get_numeric_data(self): ) # Check sharing - numeric.set("float", np.array([100.0, 200.0, 300.0])) + numeric.iset(numeric.items.get_loc("float"), np.array([100.0, 200.0, 300.0])) tm.assert_almost_equal( mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0]) ) @@ -677,7 +677,9 @@ def test_get_numeric_data(self): tm.assert_index_equal( numeric.items, pd.Index(["int", "float", "complex", "bool"]) ) - numeric2.set("float", np.array([1000.0, 2000.0, 3000.0])) + numeric2.iset( + numeric2.items.get_loc("float"), np.array([1000.0, 2000.0, 3000.0]) + ) tm.assert_almost_equal( mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0]) ) @@ -688,7 +690,7 @@ def test_get_bool_data(self): "str: object; bool: bool; obj: object; dt: datetime", item_shape=(3,), ) - mgr.set("obj", np.array([True, False, True], dtype=np.object_)) + mgr.iset(6, np.array([True, False, True], dtype=np.object_)) bools = mgr.get_bool_data() tm.assert_index_equal(bools.items, pd.Index(["bool"])) @@ -696,14 +698,14 @@ def test_get_bool_data(self): mgr.get("bool").internal_values(), bools.get("bool").internal_values() ) - bools.set("bool", np.array([True, False, True])) + bools.iset(0, np.array([True, False, True])) tm.assert_numpy_array_equal( mgr.get("bool").internal_values(), np.array([True, False, True]) ) # Check sharing bools2 = mgr.get_bool_data(copy=True) - bools2.set("bool", np.array([False, True, False])) + bools2.iset(0, np.array([False, True, False])) tm.assert_numpy_array_equal( mgr.get("bool").internal_values(), np.array([True, False, True]) ) From f9a1652bf103a30ac3ca80983f500c67e8e7f085 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 17 Apr 2020 10:33:17 -0700 Subject: [PATCH 2/2] avoid need for clear kwarg --- pandas/core/frame.py | 8 ++++---- pandas/core/generic.py | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3450bc25fb1fd..85bb47485a2e7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2830,13 +2830,13 @@ def _setitem_frame(self, key, value): self._check_setitem_copy() self._where(-key, value, inplace=True) - def _iset_item(self, loc: int, value, clear: bool = True): + def _iset_item(self, loc: int, value): self._ensure_valid_index(value) # technically _sanitize_column expects a label, not a position, # but the behavior is the same as long as we pass broadcast=False value = self._sanitize_column(loc, value, broadcast=False) - NDFrame._iset_item(self, loc, value, clear=clear) + NDFrame._iset_item(self, loc, value) # check if we are modifying a copy # try to set first as we want an invalid @@ -2844,7 +2844,7 @@ def _iset_item(self, loc: int, value, clear: bool = True): if len(self): self._check_setitem_copy() - def _set_item(self, key, value, clear: bool = True): + def _set_item(self, key, value): """ Add series to DataFrame in specified column. @@ -2856,7 +2856,7 @@ def _set_item(self, key, value, clear: bool = True): """ self._ensure_valid_index(value) value = self._sanitize_column(key, value) - NDFrame._set_item(self, key, value, clear=clear) + NDFrame._set_item(self, key, value) # check if we are modifying a copy # try to set first as we want an invalid diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 097bc0e59cdec..e1beeff3f2005 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3217,7 +3217,8 @@ def _maybe_cache_changed(self, item, value) -> None: """ The object has called back to us saying maybe it has changed. """ - NDFrame._set_item(self, item, value, clear=False) + loc = self._info_axis.get_loc(item) + self._mgr.iset(loc, value) @property def _is_cached(self) -> bool_t: @@ -3589,12 +3590,11 @@ def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries: result._set_is_copy(self, copy=is_copy) return result - def _iset_item(self, loc: int, value, clear: bool_t = True) -> None: + def _iset_item(self, loc: int, value) -> None: self._mgr.iset(loc, value) - if clear: - self._clear_item_cache() + self._clear_item_cache() - def _set_item(self, key, value, clear: bool_t = True) -> None: + def _set_item(self, key, value) -> None: try: loc = self._info_axis.get_loc(key) except KeyError: @@ -3602,7 +3602,7 @@ def _set_item(self, key, value, clear: bool_t = True) -> None: self._mgr.insert(len(self._info_axis), key, value) return - NDFrame._iset_item(self, loc, value, clear=clear) + NDFrame._iset_item(self, loc, value) def _set_is_copy(self, ref, copy: bool_t = True) -> None: if not copy: