Skip to content

Commit fb9f205

Browse files
authored
BUG: Series.__setitem__ failing to cast numeric values (#45121)
1 parent fa3d5f1 commit fb9f205

File tree

8 files changed

+39
-28
lines changed

8 files changed

+39
-28
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,7 @@ Indexing
837837
- Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`)
838838
- Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`)
839839
- Bug in :meth:`Series.__getitem__` with a :class:`CategoricalIndex` of integers treating lists of integers as positional indexers, inconsistent with the behavior with a single scalar integer (:issue:`15470`, :issue:`14865`)
840+
- Bug in :meth:`Series.__setitem__` when setting floats or integers into integer-dtype series failing to upcast when necessary to retain precision (:issue:`45121`)
840841
-
841842

842843
Missing

pandas/core/dtypes/cast.py

+6
Original file line numberDiff line numberDiff line change
@@ -2209,6 +2209,12 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
22092209
# Anything other than integer we cannot hold
22102210
return False
22112211
elif dtype.itemsize < tipo.itemsize:
2212+
if is_integer(element):
2213+
# e.g. test_setitem_series_int8 if we have a python int 1
2214+
# tipo may be np.int32, despite the fact that it will fit
2215+
# in smaller int dtypes.
2216+
info = np.iinfo(dtype)
2217+
return info.min <= element <= info.max
22122218
return False
22132219
elif not isinstance(tipo, np.dtype):
22142220
# i.e. nullable IntegerDtype; we can put this into an ndarray

pandas/core/frame.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,14 @@
9292
)
9393

9494
from pandas.core.dtypes.cast import (
95+
can_hold_element,
9596
construct_1d_arraylike_from_scalar,
9697
construct_2d_arraylike_from_scalar,
9798
find_common_type,
9899
infer_dtype_from_scalar,
99100
invalidate_string_dtypes,
100101
maybe_box_native,
101102
maybe_downcast_to_dtype,
102-
validate_numeric_casting,
103103
)
104104
from pandas.core.dtypes.common import (
105105
ensure_platform_int,
@@ -3865,7 +3865,9 @@ def _set_value(
38653865

38663866
series = self._get_item_cache(col)
38673867
loc = self.index.get_loc(index)
3868-
validate_numeric_casting(series.dtype, value)
3868+
if not can_hold_element(series._values, value):
3869+
# We'll go through loc and end up casting.
3870+
raise TypeError
38693871

38703872
series._mgr.setitem_inplace(loc, value)
38713873
# Note: trying to use series._set_value breaks tests in

pandas/core/indexes/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@
6969
find_common_type,
7070
infer_dtype_from,
7171
maybe_cast_pointwise_result,
72-
validate_numeric_casting,
7372
)
7473
from pandas.core.dtypes.common import (
7574
ensure_int64,
@@ -5643,7 +5642,8 @@ def set_value(self, arr, key, value):
56435642
stacklevel=find_stack_level(),
56445643
)
56455644
loc = self._engine.get_loc(key)
5646-
validate_numeric_casting(arr.dtype, value)
5645+
if not can_hold_element(arr, value):
5646+
raise ValueError
56475647
arr[loc] = value
56485648

56495649
_index_shared_docs[

pandas/core/series.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@
6262
)
6363

6464
from pandas.core.dtypes.cast import (
65+
can_hold_element,
6566
convert_dtypes,
6667
maybe_box_native,
6768
maybe_cast_pointwise_result,
68-
validate_numeric_casting,
6969
)
7070
from pandas.core.dtypes.common import (
7171
ensure_platform_int,
@@ -1143,9 +1143,9 @@ def __setitem__(self, key, value) -> None:
11431143

11441144
def _set_with_engine(self, key, value) -> None:
11451145
loc = self.index.get_loc(key)
1146-
# error: Argument 1 to "validate_numeric_casting" has incompatible type
1147-
# "Union[dtype, ExtensionDtype]"; expected "dtype"
1148-
validate_numeric_casting(self.dtype, value) # type: ignore[arg-type]
1146+
if not can_hold_element(self._values, value):
1147+
raise ValueError
1148+
11491149
# this is equivalent to self._values[key] = value
11501150
self._mgr.setitem_inplace(loc, value)
11511151

pandas/tests/dtypes/cast/test_can_hold_element.py

+15
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,18 @@ def test_can_hold_element_int_values_float_ndarray():
5353
# integer but not losslessly castable to int64
5454
element = np.array([3, 2 ** 65], dtype=np.float64)
5555
assert not can_hold_element(arr, element)
56+
57+
58+
def test_can_hold_element_int8_int():
59+
arr = np.array([], dtype=np.int8)
60+
61+
element = 2
62+
assert can_hold_element(arr, element)
63+
assert can_hold_element(arr, np.int8(element))
64+
assert can_hold_element(arr, np.uint8(element))
65+
assert can_hold_element(arr, np.int16(element))
66+
assert can_hold_element(arr, np.uint16(element))
67+
assert can_hold_element(arr, np.int32(element))
68+
assert can_hold_element(arr, np.uint32(element))
69+
assert can_hold_element(arr, np.int64(element))
70+
assert can_hold_element(arr, np.uint64(element))

pandas/tests/frame/indexing/test_set_value.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import numpy as np
2-
import pytest
32

43
from pandas.core.dtypes.common import is_float_dtype
54

@@ -38,9 +37,9 @@ def test_set_value_resize(self, float_frame):
3837
res._set_value("foobar", "baz", 5)
3938
assert is_float_dtype(res["baz"])
4039
assert isna(res["baz"].drop(["foobar"])).all()
41-
msg = "could not convert string to float: 'sam'"
42-
with pytest.raises(ValueError, match=msg):
43-
res._set_value("foobar", "baz", "sam")
40+
41+
res._set_value("foobar", "baz", "sam")
42+
assert res.loc["foobar", "baz"] == "sam"
4443

4544
def test_set_value_with_index_dtype_change(self):
4645
df_orig = DataFrame(np.random.randn(3, 3), index=range(3), columns=list("ABC"))

pandas/tests/indexing/test_coercion.py

+4-16
Original file line numberDiff line numberDiff line change
@@ -110,38 +110,26 @@ def test_setitem_series_object(self, val, exp_dtype):
110110
"val,exp_dtype",
111111
[(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
112112
)
113-
def test_setitem_series_int64(self, val, exp_dtype, request):
113+
def test_setitem_series_int64(self, val, exp_dtype):
114114
obj = pd.Series([1, 2, 3, 4])
115115
assert obj.dtype == np.int64
116116

117-
if exp_dtype is np.float64:
118-
exp = pd.Series([1, 1, 3, 4])
119-
self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64)
120-
mark = pytest.mark.xfail(reason="GH12747 The result must be float")
121-
request.node.add_marker(mark)
122-
123117
exp = pd.Series([1, val, 3, 4])
124118
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
125119

126120
@pytest.mark.parametrize(
127121
"val,exp_dtype", [(np.int32(1), np.int8), (np.int16(2 ** 9), np.int16)]
128122
)
129-
def test_setitem_series_int8(self, val, exp_dtype, request):
123+
def test_setitem_series_int8(self, val, exp_dtype):
130124
obj = pd.Series([1, 2, 3, 4], dtype=np.int8)
131125
assert obj.dtype == np.int8
132126

133-
if exp_dtype is np.int16:
134-
exp = pd.Series([1, 0, 3, 4], dtype=np.int8)
135-
self._assert_setitem_series_conversion(obj, val, exp, np.int8)
136-
mark = pytest.mark.xfail(
137-
reason="BUG: it must be pd.Series([1, 1, 3, 4], dtype=np.int16"
138-
)
139-
request.node.add_marker(mark)
140-
141127
warn = None if exp_dtype is np.int8 else FutureWarning
142128
msg = "Values are too large to be losslessly cast to int8"
143129
with tm.assert_produces_warning(warn, match=msg):
144130
exp = pd.Series([1, val, 3, 4], dtype=np.int8)
131+
132+
exp = pd.Series([1, val, 3, 4], dtype=exp_dtype)
145133
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
146134

147135
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)