Skip to content

Commit b99ec4a

Browse files
REF: Add Manager.column_setitem to set values into a single column (without intermediate series) (#47074)
1 parent aa85f02 commit b99ec4a

File tree

9 files changed

+64
-59
lines changed

9 files changed

+64
-59
lines changed

pandas/core/frame.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@
9898
)
9999

100100
from pandas.core.dtypes.cast import (
101-
LossySetitemError,
102101
can_hold_element,
103102
construct_1d_arraylike_from_scalar,
104103
construct_2d_arraylike_from_scalar,
@@ -3942,17 +3941,18 @@ def _set_value(
39423941
"""
39433942
try:
39443943
if takeable:
3945-
series = self._ixs(col, axis=1)
3946-
loc = index
3944+
icol = col
3945+
iindex = cast(int, index)
39473946
else:
3948-
series = self._get_item_cache(col)
3949-
loc = self.index.get_loc(index)
3950-
3951-
# setitem_inplace will do validation that may raise TypeError,
3952-
# ValueError, or LossySetitemError
3953-
series._mgr.setitem_inplace(loc, value)
3954-
3955-
except (KeyError, TypeError, ValueError, LossySetitemError):
3947+
icol = self.columns.get_loc(col)
3948+
iindex = self.index.get_loc(index)
3949+
self._mgr.column_setitem(icol, iindex, value)
3950+
self._clear_item_cache()
3951+
3952+
except (KeyError, TypeError, ValueError):
3953+
# get_loc might raise a KeyError for missing labels (falling back
3954+
# to (i)loc will do expansion of the index)
3955+
# column_setitem will do validation that may raise TypeError or ValueError
39563956
# set using a non-recursive method & reset the cache
39573957
if takeable:
39583958
self.iloc[index, col] = value

pandas/core/indexers/__init__.py

-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
check_setitem_lengths,
55
deprecate_ndim_indexing,
66
is_empty_indexer,
7-
is_exact_shape_match,
87
is_list_like_indexer,
98
is_scalar_indexer,
109
is_valid_positional_slice,
@@ -23,7 +22,6 @@
2322
"check_setitem_lengths",
2423
"validate_indices",
2524
"maybe_convert_indices",
26-
"is_exact_shape_match",
2725
"length_of_indexer",
2826
"deprecate_ndim_indexing",
2927
"unpack_1tuple",

pandas/core/indexers/utils.py

+1-25
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,7 @@
1111

1212
import numpy as np
1313

14-
from pandas._typing import (
15-
AnyArrayLike,
16-
ArrayLike,
17-
)
14+
from pandas._typing import AnyArrayLike
1815
from pandas.util._exceptions import find_stack_level
1916

2017
from pandas.core.dtypes.common import (
@@ -294,27 +291,6 @@ def maybe_convert_indices(indices, n: int, verify: bool = True):
294291
# Unsorted
295292

296293

297-
def is_exact_shape_match(target: ArrayLike, value: ArrayLike) -> bool:
298-
"""
299-
Is setting this value into this target overwriting the entire column?
300-
301-
Parameters
302-
----------
303-
target : np.ndarray or ExtensionArray
304-
value : np.ndarray or ExtensionArray
305-
306-
Returns
307-
-------
308-
bool
309-
"""
310-
return (
311-
len(value.shape) > 0
312-
and len(target.shape) > 0
313-
and value.shape[0] == target.shape[0]
314-
and value.size == target.size
315-
)
316-
317-
318294
def length_of_indexer(indexer, target=None) -> int:
319295
"""
320296
Return the expected length of target[indexer]

pandas/core/indexing.py

+9-18
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
from pandas.core.indexers import (
5454
check_array_indexer,
5555
is_empty_indexer,
56-
is_exact_shape_match,
5756
is_list_like_indexer,
5857
is_scalar_indexer,
5958
length_of_indexer,
@@ -1951,8 +1950,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
19511950
"""
19521951
pi = plane_indexer
19531952

1954-
ser = self.obj._ixs(loc, axis=1)
1955-
orig_values = ser._values
1953+
orig_values = self.obj._get_column_array(loc)
19561954

19571955
# perform the equivalent of a setitem on the info axis
19581956
# as we have a null slice or a slice with full bounds
@@ -1963,7 +1961,8 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
19631961
pass
19641962
elif (
19651963
is_array_like(value)
1966-
and is_exact_shape_match(ser, value)
1964+
and len(value.shape) > 0
1965+
and self.obj.shape[0] == value.shape[0]
19671966
and not is_empty_indexer(pi)
19681967
):
19691968
if is_list_like(pi):
@@ -1972,31 +1971,23 @@ def _setitem_single_column(self, loc: int, value, plane_indexer):
19721971
# in case of slice
19731972
value = value[pi]
19741973
else:
1975-
# set the item, first attempting to operate inplace, then
1976-
# falling back to casting if necessary; see
1977-
# _whatsnew_130.notable_bug_fixes.setitem_column_try_inplace
1978-
1979-
orig_values = ser._values
1980-
ser._mgr = ser._mgr.setitem((pi,), value)
1981-
1982-
if ser._values is orig_values:
1983-
# The setitem happened inplace, so the DataFrame's values
1984-
# were modified inplace.
1985-
return
1986-
self.obj._iset_item(loc, ser)
1974+
# set value into the column (first attempting to operate inplace, then
1975+
# falling back to casting if necessary)
1976+
self.obj._mgr.column_setitem(loc, plane_indexer, value)
1977+
self.obj._clear_item_cache()
19871978
return
19881979

19891980
# We will not operate in-place, but will attempt to in the future.
19901981
# To determine whether we need to issue a FutureWarning, see if the
19911982
# setting in-place would work, i.e. behavior will change.
1992-
warn = can_hold_element(ser._values, value)
1983+
warn = can_hold_element(orig_values, value)
19931984
# Don't issue the warning yet, as we can still trim a few cases where
19941985
# behavior will not change.
19951986

19961987
self.obj._iset_item(loc, value)
19971988

19981989
if warn:
1999-
new_values = self.obj._ixs(loc, axis=1)._values
1990+
new_values = self.obj._get_column_array(loc)
20001991

20011992
if (
20021993
isinstance(new_values, np.ndarray)

pandas/core/internals/array_manager.py

+16
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
is_datetime64_ns_dtype,
3737
is_dtype_equal,
3838
is_extension_array_dtype,
39+
is_integer,
3940
is_numeric_dtype,
4041
is_object_dtype,
4142
is_timedelta64_ns_dtype,
@@ -869,6 +870,21 @@ def iset(
869870
self.arrays[mgr_idx] = value_arr
870871
return
871872

873+
def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None:
874+
"""
875+
Set values ("setitem") into a single column (not setting the full column).
876+
877+
This is a method on the ArrayManager level, to avoid creating an
878+
intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`)
879+
"""
880+
if not is_integer(loc):
881+
raise TypeError("The column index should be an integer")
882+
arr = self.arrays[loc]
883+
mgr = SingleArrayManager([arr], [self._axes[0]])
884+
new_mgr = mgr.setitem((idx,), value)
885+
# update existing ArrayManager in-place
886+
self.arrays[loc] = new_mgr.arrays[0]
887+
872888
def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
873889
"""
874890
Insert item at selected position.

pandas/core/internals/managers.py

+11
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,17 @@ def _iset_single(
11881188
self.blocks = new_blocks
11891189
return
11901190

1191+
def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None:
1192+
"""
1193+
Set values ("setitem") into a single column (not setting the full column).
1194+
1195+
This is a method on the BlockManager level, to avoid creating an
1196+
intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`)
1197+
"""
1198+
col_mgr = self.iget(loc)
1199+
new_mgr = col_mgr.setitem((idx,), value)
1200+
self.iset(loc, new_mgr._block.values, inplace=True)
1201+
11911202
def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
11921203
"""
11931204
Insert item at selected position.

pandas/tests/frame/indexing/test_setitem.py

-2
Original file line numberDiff line numberDiff line change
@@ -1099,8 +1099,6 @@ def test_setitem_partial_column_inplace(self, consolidate, using_array_manager):
10991099
# check setting occurred in-place
11001100
tm.assert_numpy_array_equal(zvals, expected.values)
11011101
assert np.shares_memory(zvals, df["z"]._values)
1102-
if not consolidate:
1103-
assert df["z"]._values is zvals
11041102

11051103
def test_setitem_duplicate_columns_not_inplace(self):
11061104
# GH#39510

pandas/tests/indexing/test_at.py

+13
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
CategoricalDtype,
1111
CategoricalIndex,
1212
DataFrame,
13+
MultiIndex,
1314
Series,
1415
Timestamp,
1516
)
@@ -96,6 +97,18 @@ def test_at_setitem_categorical_missing(self):
9697

9798
tm.assert_frame_equal(df, expected)
9899

100+
def test_at_setitem_multiindex(self):
101+
df = DataFrame(
102+
np.zeros((3, 2), dtype="int64"),
103+
columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]),
104+
)
105+
df.at[0, "a"] = 10
106+
expected = DataFrame(
107+
[[10, 10], [0, 0], [0, 0]],
108+
columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]),
109+
)
110+
tm.assert_frame_equal(df, expected)
111+
99112

100113
class TestAtSetItemWithExpansion:
101114
def test_at_setitem_expansion_series_dt64tz_value(self, tz_naive_fixture):

pandas/tests/indexing/test_partial.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def test_partial_setting(self):
266266
with pytest.raises(IndexError, match=msg):
267267
s.iat[3] = 5.0
268268

269-
def test_partial_setting_frame(self):
269+
def test_partial_setting_frame(self, using_array_manager):
270270
df_orig = DataFrame(
271271
np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64"
272272
)
@@ -279,6 +279,8 @@ def test_partial_setting_frame(self):
279279
df.iloc[4, 2] = 5.0
280280

281281
msg = "index 2 is out of bounds for axis 0 with size 2"
282+
if using_array_manager:
283+
msg = "list index out of range"
282284
with pytest.raises(IndexError, match=msg):
283285
df.iat[4, 2] = 5.0
284286

0 commit comments

Comments
 (0)