BUG: can_hold_element size checks on ints/floats (#45273)

jbrockmendel · web-flow · commit 37c33438837c · 2022-01-10T08:24:14.000-05:00
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -154,6 +154,7 @@ Indexing
 ^^^^^^^^
 - Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
 - Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`)
+- Bug when setting an integer too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`)
 -
 
 Missing
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1852,16 +1852,23 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
     tipo = maybe_infer_dtype_type(element)
 
     if dtype.kind in ["i", "u"]:
+        info = np.iinfo(dtype)
+
         if isinstance(element, range):
             if _dtype_can_hold_range(element, dtype):
                 return element
             raise ValueError
 
+        elif is_integer(element) or (is_float(element) and element.is_integer()):
+            # e.g. test_setitem_series_int8 if we have a python int 1
+            #  tipo may be np.int32, despite the fact that it will fit
+            #  in smaller int dtypes.
+            if info.min <= element <= info.max:
+                return element
+            raise ValueError
+
         if tipo is not None:
             if tipo.kind not in ["i", "u"]:
-                if is_float(element) and element.is_integer():
-                    return element
-
                 if isinstance(element, np.ndarray) and element.dtype.kind == "f":
                     # If all can be losslessly cast to integers, then we can hold them
                     #  We do something similar in putmask_smart
@@ -1889,14 +1896,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
                     return casted
                 raise ValueError
             elif dtype.itemsize < tipo.itemsize:
-                if is_integer(element):
-                    # e.g. test_setitem_series_int8 if we have a python int 1
-                    #  tipo may be np.int32, despite the fact that it will fit
-                    #  in smaller int dtypes.
-                    info = np.iinfo(dtype)
-                    if info.min <= element <= info.max:
-                        return element
-                    raise ValueError
                 raise ValueError
             elif not isinstance(tipo, np.dtype):
                 # i.e. nullable IntegerDtype; we can put this into an ndarray
@@ -1909,10 +1908,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
 
             return element
 
-        # We have not inferred an integer from the dtype
-        # check if we have a builtin int or a float equal to an int
-        if is_integer(element) or (is_float(element) and element.is_integer()):
-            return element
         raise ValueError
 
     elif dtype.kind == "f":
diff --git a/pandas/tests/dtypes/cast/test_can_hold_element.py b/pandas/tests/dtypes/cast/test_can_hold_element.py
@@ -68,3 +68,12 @@ def test_can_hold_element_int8_int():
     assert can_hold_element(arr, np.uint32(element))
     assert can_hold_element(arr, np.int64(element))
     assert can_hold_element(arr, np.uint64(element))
+
+    element = 2 ** 9
+    assert not can_hold_element(arr, element)
+    assert not can_hold_element(arr, np.int16(element))
+    assert not can_hold_element(arr, np.uint16(element))
+    assert not can_hold_element(arr, np.int32(element))
+    assert not can_hold_element(arr, np.uint32(element))
+    assert not can_hold_element(arr, np.int64(element))
+    assert not can_hold_element(arr, np.uint64(element))
diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
@@ -753,7 +753,12 @@ def test_where_try_cast_deprecated(frame_or_series):
         obj.where(mask, -1, try_cast=False)
 
 
-def test_where_int_downcasting_deprecated(using_array_manager):
+@pytest.mark.xfail(
+    reason="After fixing a bug in can_hold_element, we don't go through "
+    "the deprecated path, and also up-cast to int64 instead of int32 "
+    "(for now)."
+)
+def test_where_int_downcasting_deprecated(using_array_manager, request):
     # GH#44597
     arr = np.arange(6).astype(np.int16).reshape(3, 2)
     df = DataFrame(arr)
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -2734,6 +2734,17 @@ def test_loc_getitem_nullable_index_with_duplicates():
     tm.assert_series_equal(res, expected)
 
 
+def test_loc_setitem_uint8_upcast():
+    # GH#26049
+
+    df = DataFrame([1, 2, 3, 4], columns=["col1"], dtype="uint8")
+    df.loc[2, "col1"] = 300  # value that can't be held in uint8
+
+    # TODO: would be better to get uint16?
+    expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="int64")
+    tm.assert_frame_equal(df, expected)
+
+
 class TestLocSeries:
     @pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)])
     def test_loc_uint64(self, val, expected):
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
@@ -1079,16 +1079,25 @@ def expected(self):
 
     def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace, request):
         if not isinstance(val, np.int16):
+            # with python int we end up with int64
             mark = pytest.mark.xfail
             request.node.add_marker(mark)
         super().test_int_key(obj, key, expected, val, indexer_sli, is_inplace)
 
     def test_mask_key(self, obj, key, expected, val, indexer_sli, request):
         if not isinstance(val, np.int16):
+            # with python int we end up with int64
             mark = pytest.mark.xfail
             request.node.add_marker(mark)
         super().test_mask_key(obj, key, expected, val, indexer_sli)
 
+    def test_series_where(self, obj, key, expected, val, is_inplace, request):
+        if not isinstance(val, np.int16):
+            # with python int we end up with int64
+            mark = pytest.mark.xfail
+            request.node.add_marker(mark)
+        super().test_series_where(obj, key, expected, val, is_inplace)
+
 
 @pytest.mark.parametrize("val", [2 ** 33 + 1.0, 2 ** 33 + 1.1, 2 ** 62])
 class TestSmallIntegerSetitemUpcast(SetitemCastingEquivalents):
@@ -1109,20 +1118,14 @@ def expected(self, val):
             dtype = "i8"
         return Series([val, 2, 3], dtype=dtype)
 
-    def test_series_where(self, obj, key, expected, val, is_inplace, request):
-        if isinstance(val, float) and val % 1 == 0:
-            mark = pytest.mark.xfail
-            request.node.add_marker(mark)
-        super().test_series_where(obj, key, expected, val, is_inplace)
-
     def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace, request):
-        if val % 1 == 0:
+        if val % 1 == 0 and isinstance(val, float):
             mark = pytest.mark.xfail
             request.node.add_marker(mark)
         super().test_int_key(obj, key, expected, val, indexer_sli, is_inplace)
 
     def test_mask_key(self, obj, key, expected, val, indexer_sli, request):
-        if val % 1 == 0:
+        if val % 1 == 0 and isinstance(val, float):
             mark = pytest.mark.xfail
             request.node.add_marker(mark)
         super().test_mask_key(obj, key, expected, val, indexer_sli)

Original file line number	Diff line number	Diff line change
`@@ -154,6 +154,7 @@ Indexing`
`154`	`154`	`^^^^^^^^`
`155`	`155`	- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
`156`	`156`	- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`)
	`157`	+- Bug when setting an integer too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`)
`157`	`158`	`-`
`158`	`159`
`159`	`160`	`Missing`