Skip to content

Commit d88d27c

Browse files
authored
BUG: failure to cast all-int floating dtypes when setting into int dtypes (#44316)
* BUG: failure to cast all-int floating dtypes when setting into int dtypes * whatsnew * extra inplace-ness check
1 parent df28239 commit d88d27c

File tree

8 files changed

+120
-13
lines changed

8 files changed

+120
-13
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,7 @@ Indexing
535535
- Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`)
536536
- Bug in indexing on a non-unique object-dtype :class:`Index` with an NA scalar (e.g. ``np.nan``) (:issue:`43711`)
537537
- Bug in :meth:`DataFrame.__setitem__` incorrectly writing into an existing column's array rather than setting a new array when the new dtype and the old dtype match (:issue:`43406`)
538+
- Bug in setting floating-dtype values into a :class:`Series` with integer dtype failing to set inplace when those values can be losslessly converted to integers (:issue:`44316`)
538539
- Bug in :meth:`Series.__setitem__` with object dtype when setting an array with matching size and dtype='datetime64[ns]' or dtype='timedelta64[ns]' incorrectly converting the datetime/timedeltas to integers (:issue:`43868`)
539540
- Bug in :meth:`DataFrame.sort_index` where ``ignore_index=True`` was not being respected when the index was already sorted (:issue:`43591`)
540541
- Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.datetime64("NaT")`` and ``np.timedelta64("NaT")`` (:issue:`43869`)

pandas/core/dtypes/cast.py

+8
Original file line numberDiff line numberDiff line change
@@ -2205,6 +2205,14 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
22052205
if tipo.kind not in ["i", "u"]:
22062206
if is_float(element) and element.is_integer():
22072207
return True
2208+
2209+
if isinstance(element, np.ndarray) and element.dtype.kind == "f":
2210+
# If all can be losslessly cast to integers, then we can hold them
2211+
# We do something similar in putmask_smart
2212+
casted = element.astype(dtype)
2213+
comp = casted == element
2214+
return comp.all()
2215+
22082216
# Anything other than integer we cannot hold
22092217
return False
22102218
elif dtype.itemsize < tipo.itemsize:

pandas/core/indexes/numeric.py

+12
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,18 @@ def asi8(self) -> npt.NDArray[np.int64]:
425425
)
426426
return self._values.view(self._default_dtype)
427427

428+
def _validate_fill_value(self, value):
429+
# e.g. np.array([1.0]) we want np.array([1], dtype=self.dtype)
430+
# see TestSetitemFloatNDarrayIntoIntegerSeries
431+
super()._validate_fill_value(value)
432+
if hasattr(value, "dtype") and is_float_dtype(value.dtype):
433+
converted = value.astype(self.dtype)
434+
if (converted == value).all():
435+
# See also: can_hold_element
436+
return converted
437+
raise TypeError
438+
return value
439+
428440

429441
class Int64Index(IntegerIndex):
430442
_index_descr_args = {

pandas/core/internals/blocks.py

+8
Original file line numberDiff line numberDiff line change
@@ -1193,6 +1193,14 @@ def where(self, other, cond) -> list[Block]:
11931193
values, icond.sum(), other # type: ignore[arg-type]
11941194
)
11951195
if alt is not other:
1196+
if is_list_like(other) and len(other) < len(values):
1197+
# call np.where with other to get the appropriate ValueError
1198+
np.where(~icond, values, other)
1199+
raise NotImplementedError(
1200+
"This should not be reached; call to np.where above is "
1201+
"expected to raise ValueError. Please report a bug at "
1202+
"github.com/pandas-dev/pandas"
1203+
)
11961204
result = values.copy()
11971205
np.putmask(result, icond, alt)
11981206
else:

pandas/tests/dtypes/cast/test_can_hold_element.py

+13
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,16 @@ def test_can_hold_element_range(any_int_numpy_dtype):
4040
rng = range(10 ** 10, 10 ** 10)
4141
assert len(rng) == 0
4242
assert can_hold_element(arr, rng)
43+
44+
45+
def test_can_hold_element_int_values_float_ndarray():
46+
arr = np.array([], dtype=np.int64)
47+
48+
element = np.array([1.0, 2.0])
49+
assert can_hold_element(arr, element)
50+
51+
assert not can_hold_element(arr, element + 0.5)
52+
53+
# integer but not losslessly castable to int64
54+
element = np.array([3, 2 ** 65], dtype=np.float64)
55+
assert not can_hold_element(arr, element)

pandas/tests/indexing/multiindex/test_setitem.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -196,16 +196,35 @@ def test_multiindex_assignment(self):
196196
df.loc[4, "d"] = arr
197197
tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d"))
198198

199+
def test_multiindex_assignment_single_dtype(self, using_array_manager):
200+
# GH3777 part 2b
199201
# single dtype
202+
arr = np.array([0.0, 1.0])
203+
200204
df = DataFrame(
201205
np.random.randint(5, 10, size=9).reshape(3, 3),
202206
columns=list("abc"),
203207
index=[[4, 4, 8], [8, 10, 12]],
208+
dtype=np.int64,
204209
)
210+
view = df["c"].iloc[:2].values
205211

212+
# arr can be losslessly cast to int, so this setitem is inplace
206213
df.loc[4, "c"] = arr
207-
exp = Series(arr, index=[8, 10], name="c", dtype="float64")
208-
tm.assert_series_equal(df.loc[4, "c"], exp)
214+
exp = Series(arr, index=[8, 10], name="c", dtype="int64")
215+
result = df.loc[4, "c"]
216+
tm.assert_series_equal(result, exp)
217+
if not using_array_manager:
218+
# FIXME(ArrayManager): this correctly preserves dtype,
219+
# but incorrectly is not inplace.
220+
# extra check for inplace-ness
221+
tm.assert_numpy_array_equal(view, exp.values)
222+
223+
# arr + 0.5 cannot be cast losslessly to int, so we upcast
224+
df.loc[4, "c"] = arr + 0.5
225+
result = df.loc[4, "c"]
226+
exp = exp + 0.5
227+
tm.assert_series_equal(result, exp)
209228

210229
# scalar ok
211230
df.loc[4, "c"] = 10

pandas/tests/indexing/test_iloc.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -515,9 +515,18 @@ def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(
515515
# but on a DataFrame with multiple blocks
516516
df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"])
517517

518+
# setting float values that can be held by existing integer arrays
519+
# is inplace
518520
df.iloc[:, 0] = df.iloc[:, 0].astype("f8")
521+
if not using_array_manager:
522+
assert len(df._mgr.blocks) == 1
523+
524+
# if the assigned values cannot be held by existing integer arrays,
525+
# we cast
526+
df.iloc[:, 0] = df.iloc[:, 0] + 0.5
519527
if not using_array_manager:
520528
assert len(df._mgr.blocks) == 2
529+
521530
expected = df.copy()
522531

523532
# assign back to self
@@ -892,7 +901,7 @@ def test_iloc_with_boolean_operation(self):
892901
tm.assert_frame_equal(result, expected)
893902

894903
result.iloc[[False, False, True, True]] /= 2
895-
expected = DataFrame([[0.0, 4.0], [8.0, 12.0], [4.0, 5.0], [6.0, np.nan]])
904+
expected = DataFrame([[0, 4.0], [8, 12.0], [4, 5.0], [6, np.nan]])
896905
tm.assert_frame_equal(result, expected)
897906

898907
def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self):

pandas/tests/series/indexing/test_setitem.py

+47-10
Original file line numberDiff line numberDiff line change
@@ -620,23 +620,27 @@ def test_mask_key(self, obj, key, expected, val, indexer_sli):
620620
mask[key] = True
621621

622622
obj = obj.copy()
623+
624+
if is_list_like(val) and len(val) < mask.sum():
625+
msg = "boolean index did not match indexed array along dimension"
626+
with pytest.raises(IndexError, match=msg):
627+
indexer_sli(obj)[mask] = val
628+
return
629+
623630
indexer_sli(obj)[mask] = val
624631
tm.assert_series_equal(obj, expected)
625632

626633
def test_series_where(self, obj, key, expected, val, is_inplace):
627-
if is_list_like(val) and len(val) < len(obj):
628-
# Series.where is not valid here
629-
if isinstance(val, range):
630-
return
631-
632-
# FIXME: The remaining TestSetitemDT64IntoInt that go through here
633-
# are relying on technically-incorrect behavior because Block.where
634-
# uses np.putmask instead of expressions.where in those cases,
635-
# which has different length-checking semantics.
636-
637634
mask = np.zeros(obj.shape, dtype=bool)
638635
mask[key] = True
639636

637+
if is_list_like(val) and len(val) < len(obj):
638+
# Series.where is not valid here
639+
msg = "operands could not be broadcast together with shapes"
640+
with pytest.raises(ValueError, match=msg):
641+
obj.where(~mask, val)
642+
return
643+
640644
orig = obj
641645
obj = obj.copy()
642646
arr = obj._values
@@ -1014,6 +1018,39 @@ def inplace(self):
10141018
return True
10151019

10161020

1021+
@pytest.mark.parametrize(
1022+
"val",
1023+
[
1024+
np.array([2.0, 3.0]),
1025+
np.array([2.5, 3.5]),
1026+
np.array([2 ** 65, 2 ** 65 + 1], dtype=np.float64), # all ints, but can't cast
1027+
],
1028+
)
1029+
class TestSetitemFloatNDarrayIntoIntegerSeries(SetitemCastingEquivalents):
1030+
@pytest.fixture
1031+
def obj(self):
1032+
return Series(range(5), dtype=np.int64)
1033+
1034+
@pytest.fixture
1035+
def key(self):
1036+
return slice(0, 2)
1037+
1038+
@pytest.fixture
1039+
def inplace(self, val):
1040+
# NB: this condition is based on currently-harcoded "val" cases
1041+
return val[0] == 2
1042+
1043+
@pytest.fixture
1044+
def expected(self, val, inplace):
1045+
if inplace:
1046+
dtype = np.int64
1047+
else:
1048+
dtype = np.float64
1049+
res_values = np.array(range(5), dtype=dtype)
1050+
res_values[:2] = val
1051+
return Series(res_values)
1052+
1053+
10171054
def test_setitem_int_as_positional_fallback_deprecation():
10181055
# GH#42215 deprecated falling back to positional on __setitem__ with an
10191056
# int not contained in the index

0 commit comments

Comments
 (0)