Skip to content

Commit 37c3343

Browse files
authored
BUG: can_hold_element size checks on ints/floats (#45273)
1 parent 5940f15 commit 37c3343

File tree

6 files changed

+48
-24
lines changed

6 files changed

+48
-24
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ Indexing
154154
^^^^^^^^
155155
- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
156156
- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`)
157+
- Bug when setting an integer too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`)
157158
-
158159

159160
Missing

pandas/core/dtypes/cast.py

+10-15
Original file line numberDiff line numberDiff line change
@@ -1852,16 +1852,23 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
18521852
tipo = maybe_infer_dtype_type(element)
18531853

18541854
if dtype.kind in ["i", "u"]:
1855+
info = np.iinfo(dtype)
1856+
18551857
if isinstance(element, range):
18561858
if _dtype_can_hold_range(element, dtype):
18571859
return element
18581860
raise ValueError
18591861

1862+
elif is_integer(element) or (is_float(element) and element.is_integer()):
1863+
# e.g. test_setitem_series_int8 if we have a python int 1
1864+
# tipo may be np.int32, despite the fact that it will fit
1865+
# in smaller int dtypes.
1866+
if info.min <= element <= info.max:
1867+
return element
1868+
raise ValueError
1869+
18601870
if tipo is not None:
18611871
if tipo.kind not in ["i", "u"]:
1862-
if is_float(element) and element.is_integer():
1863-
return element
1864-
18651872
if isinstance(element, np.ndarray) and element.dtype.kind == "f":
18661873
# If all can be losslessly cast to integers, then we can hold them
18671874
# We do something similar in putmask_smart
@@ -1889,14 +1896,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
18891896
return casted
18901897
raise ValueError
18911898
elif dtype.itemsize < tipo.itemsize:
1892-
if is_integer(element):
1893-
# e.g. test_setitem_series_int8 if we have a python int 1
1894-
# tipo may be np.int32, despite the fact that it will fit
1895-
# in smaller int dtypes.
1896-
info = np.iinfo(dtype)
1897-
if info.min <= element <= info.max:
1898-
return element
1899-
raise ValueError
19001899
raise ValueError
19011900
elif not isinstance(tipo, np.dtype):
19021901
# i.e. nullable IntegerDtype; we can put this into an ndarray
@@ -1909,10 +1908,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
19091908

19101909
return element
19111910

1912-
# We have not inferred an integer from the dtype
1913-
# check if we have a builtin int or a float equal to an int
1914-
if is_integer(element) or (is_float(element) and element.is_integer()):
1915-
return element
19161911
raise ValueError
19171912

19181913
elif dtype.kind == "f":

pandas/tests/dtypes/cast/test_can_hold_element.py

+9
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,12 @@ def test_can_hold_element_int8_int():
6868
assert can_hold_element(arr, np.uint32(element))
6969
assert can_hold_element(arr, np.int64(element))
7070
assert can_hold_element(arr, np.uint64(element))
71+
72+
element = 2 ** 9
73+
assert not can_hold_element(arr, element)
74+
assert not can_hold_element(arr, np.int16(element))
75+
assert not can_hold_element(arr, np.uint16(element))
76+
assert not can_hold_element(arr, np.int32(element))
77+
assert not can_hold_element(arr, np.uint32(element))
78+
assert not can_hold_element(arr, np.int64(element))
79+
assert not can_hold_element(arr, np.uint64(element))

pandas/tests/frame/indexing/test_where.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,12 @@ def test_where_try_cast_deprecated(frame_or_series):
753753
obj.where(mask, -1, try_cast=False)
754754

755755

756-
def test_where_int_downcasting_deprecated(using_array_manager):
756+
@pytest.mark.xfail(
757+
reason="After fixing a bug in can_hold_element, we don't go through "
758+
"the deprecated path, and also up-cast to int64 instead of int32 "
759+
"(for now)."
760+
)
761+
def test_where_int_downcasting_deprecated(using_array_manager, request):
757762
# GH#44597
758763
arr = np.arange(6).astype(np.int16).reshape(3, 2)
759764
df = DataFrame(arr)

pandas/tests/indexing/test_loc.py

+11
Original file line numberDiff line numberDiff line change
@@ -2734,6 +2734,17 @@ def test_loc_getitem_nullable_index_with_duplicates():
27342734
tm.assert_series_equal(res, expected)
27352735

27362736

2737+
def test_loc_setitem_uint8_upcast():
2738+
# GH#26049
2739+
2740+
df = DataFrame([1, 2, 3, 4], columns=["col1"], dtype="uint8")
2741+
df.loc[2, "col1"] = 300 # value that can't be held in uint8
2742+
2743+
# TODO: would be better to get uint16?
2744+
expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="int64")
2745+
tm.assert_frame_equal(df, expected)
2746+
2747+
27372748
class TestLocSeries:
27382749
@pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)])
27392750
def test_loc_uint64(self, val, expected):

pandas/tests/series/indexing/test_setitem.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -1079,16 +1079,25 @@ def expected(self):
10791079

10801080
def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace, request):
10811081
if not isinstance(val, np.int16):
1082+
# with python int we end up with int64
10821083
mark = pytest.mark.xfail
10831084
request.node.add_marker(mark)
10841085
super().test_int_key(obj, key, expected, val, indexer_sli, is_inplace)
10851086

10861087
def test_mask_key(self, obj, key, expected, val, indexer_sli, request):
10871088
if not isinstance(val, np.int16):
1089+
# with python int we end up with int64
10881090
mark = pytest.mark.xfail
10891091
request.node.add_marker(mark)
10901092
super().test_mask_key(obj, key, expected, val, indexer_sli)
10911093

1094+
def test_series_where(self, obj, key, expected, val, is_inplace, request):
1095+
if not isinstance(val, np.int16):
1096+
# with python int we end up with int64
1097+
mark = pytest.mark.xfail
1098+
request.node.add_marker(mark)
1099+
super().test_series_where(obj, key, expected, val, is_inplace)
1100+
10921101

10931102
@pytest.mark.parametrize("val", [2 ** 33 + 1.0, 2 ** 33 + 1.1, 2 ** 62])
10941103
class TestSmallIntegerSetitemUpcast(SetitemCastingEquivalents):
@@ -1109,20 +1118,14 @@ def expected(self, val):
11091118
dtype = "i8"
11101119
return Series([val, 2, 3], dtype=dtype)
11111120

1112-
def test_series_where(self, obj, key, expected, val, is_inplace, request):
1113-
if isinstance(val, float) and val % 1 == 0:
1114-
mark = pytest.mark.xfail
1115-
request.node.add_marker(mark)
1116-
super().test_series_where(obj, key, expected, val, is_inplace)
1117-
11181121
def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace, request):
1119-
if val % 1 == 0:
1122+
if val % 1 == 0 and isinstance(val, float):
11201123
mark = pytest.mark.xfail
11211124
request.node.add_marker(mark)
11221125
super().test_int_key(obj, key, expected, val, indexer_sli, is_inplace)
11231126

11241127
def test_mask_key(self, obj, key, expected, val, indexer_sli, request):
1125-
if val % 1 == 0:
1128+
if val % 1 == 0 and isinstance(val, float):
11261129
mark = pytest.mark.xfail
11271130
request.node.add_marker(mark)
11281131
super().test_mask_key(obj, key, expected, val, indexer_sli)

0 commit comments

Comments
 (0)