Skip to content

Commit 21cd3f0

Browse files
jbrockmendelSeeminSyed
authored andcommitted
BUG: ExtensionBlock.set not setting values inplace (pandas-dev#32831)
1 parent 113e605 commit 21cd3f0

File tree

3 files changed

+55
-25
lines changed

3 files changed

+55
-25
lines changed

pandas/core/internals/blocks.py

+27-25
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import pandas._libs.internals as libinternals
1212
from pandas._libs.tslibs import Timedelta, conversion
1313
from pandas._libs.tslibs.timezones import tz_compare
14+
from pandas._typing import ArrayLike
1415
from pandas.util._validators import validate_bool_kwarg
1516

1617
from pandas.core.dtypes.cast import (
@@ -340,11 +341,12 @@ def iget(self, i):
340341

341342
def set(self, locs, values):
342343
"""
343-
Modify Block in-place with new item value
344+
Modify block values in-place with new item value.
344345
345-
Returns
346-
-------
347-
None
346+
Notes
347+
-----
348+
`set` never creates a new array or new Block, whereas `setitem` _may_
349+
create a new array and always creates a new Block.
348350
"""
349351
self.values[locs] = values
350352

@@ -793,7 +795,7 @@ def _replace_single(self, *args, **kwargs):
793795

794796
def setitem(self, indexer, value):
795797
"""
796-
Set the value inplace, returning a a maybe different typed block.
798+
Attempt self.values[indexer] = value, possibly creating a new array.
797799
798800
Parameters
799801
----------
@@ -1633,12 +1635,15 @@ def iget(self, col):
16331635
raise IndexError(f"{self} only contains one item")
16341636
return self.values
16351637

1636-
def should_store(self, value):
1638+
def should_store(self, value: ArrayLike) -> bool:
1639+
"""
1640+
Can we set the given array-like value inplace?
1641+
"""
16371642
return isinstance(value, self._holder)
16381643

1639-
def set(self, locs, values, check=False):
1644+
def set(self, locs, values):
16401645
assert locs.tolist() == [0]
1641-
self.values = values
1646+
self.values[:] = values
16421647

16431648
def putmask(
16441649
self, mask, new, align=True, inplace=False, axis=0, transpose=False,
@@ -1749,7 +1754,7 @@ def is_numeric(self):
17491754

17501755
def setitem(self, indexer, value):
17511756
"""
1752-
Set the value inplace, returning a same-typed block.
1757+
Attempt self.values[indexer] = value, possibly creating a new array.
17531758
17541759
This differs from Block.setitem by not allowing setitem to change
17551760
the dtype of the Block.
@@ -2055,7 +2060,7 @@ def to_native_types(
20552060
)
20562061
return formatter.get_result_as_array()
20572062

2058-
def should_store(self, value) -> bool:
2063+
def should_store(self, value: ArrayLike) -> bool:
20592064
# when inserting a column should not coerce integers to floats
20602065
# unnecessarily
20612066
return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype
@@ -2073,7 +2078,7 @@ def _can_hold_element(self, element: Any) -> bool:
20732078
element, (float, int, complex, np.float_, np.int_)
20742079
) and not isinstance(element, (bool, np.bool_))
20752080

2076-
def should_store(self, value) -> bool:
2081+
def should_store(self, value: ArrayLike) -> bool:
20772082
return issubclass(value.dtype.type, np.complexfloating)
20782083

20792084

@@ -2092,7 +2097,7 @@ def _can_hold_element(self, element: Any) -> bool:
20922097
)
20932098
return is_integer(element)
20942099

2095-
def should_store(self, value) -> bool:
2100+
def should_store(self, value: ArrayLike) -> bool:
20962101
return is_integer_dtype(value) and value.dtype == self.dtype
20972102

20982103

@@ -2103,6 +2108,9 @@ class DatetimeLikeBlockMixin:
21032108
def _holder(self):
21042109
return DatetimeArray
21052110

2111+
def should_store(self, value):
2112+
return is_dtype_equal(self.dtype, value.dtype)
2113+
21062114
@property
21072115
def fill_value(self):
21082116
return np.datetime64("NaT", "ns")
@@ -2239,16 +2247,9 @@ def to_native_types(
22392247
).reshape(i8values.shape)
22402248
return np.atleast_2d(result)
22412249

2242-
def should_store(self, value) -> bool:
2243-
return is_datetime64_dtype(value.dtype)
2244-
22452250
def set(self, locs, values):
22462251
"""
2247-
Modify Block in-place with new item value
2248-
2249-
Returns
2250-
-------
2251-
None
2252+
See Block.set.__doc__
22522253
"""
22532254
values = conversion.ensure_datetime64ns(values, copy=False)
22542255

@@ -2272,6 +2273,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
22722273
_can_hold_element = DatetimeBlock._can_hold_element
22732274
to_native_types = DatetimeBlock.to_native_types
22742275
fill_value = np.datetime64("NaT", "ns")
2276+
should_store = DatetimeBlock.should_store
22752277

22762278
@property
22772279
def _holder(self):
@@ -2481,9 +2483,6 @@ def fillna(self, value, **kwargs):
24812483
)
24822484
return super().fillna(value, **kwargs)
24832485

2484-
def should_store(self, value) -> bool:
2485-
return is_timedelta64_dtype(value.dtype)
2486-
24872486
def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs):
24882487
""" convert to our native types format, slicing if desired """
24892488
values = self.values
@@ -2525,7 +2524,7 @@ def _can_hold_element(self, element: Any) -> bool:
25252524
return issubclass(tipo.type, np.bool_)
25262525
return isinstance(element, (bool, np.bool_))
25272526

2528-
def should_store(self, value) -> bool:
2527+
def should_store(self, value: ArrayLike) -> bool:
25292528
return issubclass(value.dtype.type, np.bool_) and not is_extension_array_dtype(
25302529
value
25312530
)
@@ -2617,7 +2616,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]
26172616
def _can_hold_element(self, element: Any) -> bool:
26182617
return True
26192618

2620-
def should_store(self, value) -> bool:
2619+
def should_store(self, value: ArrayLike) -> bool:
26212620
return not (
26222621
issubclass(
26232622
value.dtype.type,
@@ -2866,6 +2865,9 @@ def __init__(self, values, placement, ndim=None):
28662865
def _holder(self):
28672866
return Categorical
28682867

2868+
def should_store(self, arr: ArrayLike):
2869+
return isinstance(arr, self._holder) and is_dtype_equal(self.dtype, arr.dtype)
2870+
28692871
def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs):
28702872
""" convert to our native types format, slicing if desired """
28712873
values = self.values

pandas/tests/indexing/test_iloc.py

+11
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,17 @@ def test_series_indexing_zerodim_np_array(self):
694694
result = s.iloc[np.array(0)]
695695
assert result == 1
696696

697+
def test_iloc_setitem_categorical_updates_inplace(self):
698+
# Mixed dtype ensures we go through take_split_path in setitem_with_indexer
699+
cat = pd.Categorical(["A", "B", "C"])
700+
df = pd.DataFrame({1: cat, 2: [1, 2, 3]})
701+
702+
# This should modify our original values in-place
703+
df.iloc[:, 0] = cat[::-1]
704+
705+
expected = pd.Categorical(["C", "B", "A"])
706+
tm.assert_categorical_equal(cat, expected)
707+
697708

698709
class TestILocSetItemDuplicateColumns:
699710
def test_iloc_setitem_scalar_duplicate_columns(self):

pandas/tests/internals/test_internals.py

+17
Original file line numberDiff line numberDiff line change
@@ -1189,6 +1189,23 @@ def test_binop_other(self, op, value, dtype):
11891189
tm.assert_series_equal(result, expected)
11901190

11911191

1192+
class TestShouldStore:
1193+
def test_should_store_categorical(self):
1194+
cat = pd.Categorical(["A", "B", "C"])
1195+
df = pd.DataFrame(cat)
1196+
blk = df._data.blocks[0]
1197+
1198+
# matching dtype
1199+
assert blk.should_store(cat)
1200+
assert blk.should_store(cat[:-1])
1201+
1202+
# different dtype
1203+
assert not blk.should_store(cat.as_ordered())
1204+
1205+
# ndarray instead of Categorical
1206+
assert not blk.should_store(np.asarray(cat))
1207+
1208+
11921209
@pytest.mark.parametrize(
11931210
"typestr, holder",
11941211
[

0 commit comments

Comments
 (0)