BUG: Series[float32].__setitem__(int_cant_hold_in_int32) not coercing (#45844)

jbrockmendel · web-flow · commit 17b15e97d639 · 2022-02-08T19:58:14.000-05:00
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -319,6 +319,7 @@ Indexing
 - Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`)
 - Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`)
 - Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`)
+- Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`)
 -
 
 Missing
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -2018,6 +2018,13 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
         raise LossySetitemError
 
     elif dtype.kind == "f":
+        if lib.is_integer(element) or lib.is_float(element):
+            casted = dtype.type(element)
+            if np.isnan(casted) or casted == element:
+                return casted
+            # otherwise e.g. overflow see TestCoercionFloat32
+            raise LossySetitemError
+
         if tipo is not None:
             # TODO: itemsize check?
             if tipo.kind not in ["f", "i", "u"]:
@@ -2029,7 +2036,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
                 if element._hasna:
                     raise LossySetitemError
                 return element
-            elif tipo.itemsize > dtype.itemsize:
+            elif tipo.itemsize > dtype.itemsize or tipo.kind != dtype.kind:
                 if isinstance(element, np.ndarray):
                     # e.g. TestDataFrameIndexingWhere::test_where_alignment
                     casted = element.astype(dtype)
@@ -2040,8 +2047,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
 
             return element
 
-        if lib.is_integer(element) or lib.is_float(element):
-            return element
         raise LossySetitemError
 
     elif dtype.kind == "c":
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
@@ -753,9 +753,30 @@ def test_fillna_index_bool(self):
     def test_fillna_series_timedelta64(self):
         raise NotImplementedError
 
-    @pytest.mark.xfail(reason="Test not implemented")
-    def test_fillna_series_period(self):
-        raise NotImplementedError
+    @pytest.mark.parametrize(
+        "fill_val",
+        [
+            1,
+            1.1,
+            1 + 1j,
+            True,
+            pd.Interval(1, 2, closed="left"),
+            pd.Timestamp("2012-01-01", tz="US/Eastern"),
+            pd.Timestamp("2012-01-01"),
+            pd.Timedelta(days=1),
+            pd.Period("2016-01-01", "W"),
+        ],
+    )
+    def test_fillna_series_period(self, index_or_series, fill_val):
+
+        pi = pd.period_range("2016-01-01", periods=4, freq="D").insert(1, pd.NaT)
+        assert isinstance(pi.dtype, pd.PeriodDtype)
+        obj = index_or_series(pi)
+
+        exp = index_or_series([pi[0], fill_val, pi[2], pi[3], pi[4]], dtype=object)
+
+        fill_dtype = object
+        self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
 
     @pytest.mark.xfail(reason="Test not implemented")
     def test_fillna_index_timedelta64(self):
diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py
@@ -1260,6 +1260,43 @@ def obj(self):
         return Series([1.1, 2.2, 3.3, 4.4])
 
 
+@pytest.mark.parametrize(
+    "val,exp_dtype",
+    [
+        (1, np.float32),
+        pytest.param(
+            1.1,
+            np.float32,
+            marks=pytest.mark.xfail(
+                reason="np.float32(1.1) ends up as 1.100000023841858, so "
+                "np_can_hold_element raises and we cast to float64",
+            ),
+        ),
+        (1 + 1j, np.complex128),
+        (True, object),
+        (np.uint8(2), np.float32),
+        (np.uint32(2), np.float32),
+        # float32 cannot hold np.iinfo(np.uint32).max exactly
+        # (closest it can hold is 4294967300.0 which off by 5.0), so
+        # we cast to float64
+        (np.uint32(np.iinfo(np.uint32).max), np.float64),
+        (np.uint64(2), np.float32),
+        (np.int64(2), np.float32),
+    ],
+)
+class TestCoercionFloat32(CoercionTest):
+    @pytest.fixture
+    def obj(self):
+        return Series([1.1, 2.2, 3.3, 4.4], dtype=np.float32)
+
+    def test_slice_key(self, obj, key, expected, val, indexer_sli, is_inplace):
+        super().test_slice_key(obj, key, expected, val, indexer_sli, is_inplace)
+
+        if type(val) is float:
+            # the xfail would xpass bc test_slice_key short-circuits
+            raise AssertionError("xfail not relevant for this test.")
+
+
 @pytest.mark.parametrize(
     "val,exp_dtype",
     [(Timestamp("2012-01-01"), "datetime64[ns]"), (1, object), ("x", object)],
diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py
@@ -749,14 +749,31 @@ def test_fillna_categorical_raises(self):
 
     @pytest.mark.parametrize("dtype", [float, "float32", "float64"])
     @pytest.mark.parametrize("fill_type", tm.ALL_REAL_NUMPY_DTYPES)
-    def test_fillna_float_casting(self, dtype, fill_type):
+    @pytest.mark.parametrize("scalar", [True, False])
+    def test_fillna_float_casting(self, dtype, fill_type, scalar):
         # GH-43424
         ser = Series([np.nan, 1.2], dtype=dtype)
         fill_values = Series([2, 2], dtype=fill_type)
+        if scalar:
+            fill_values = fill_values.dtype.type(2)
+
         result = ser.fillna(fill_values)
         expected = Series([2.0, 1.2], dtype=dtype)
         tm.assert_series_equal(result, expected)
 
+        ser = Series([np.nan, 1.2], dtype=dtype)
+        mask = ser.isna().to_numpy()
+        ser[mask] = fill_values
+        tm.assert_series_equal(ser, expected)
+
+        ser = Series([np.nan, 1.2], dtype=dtype)
+        ser.mask(mask, fill_values, inplace=True)
+        tm.assert_series_equal(ser, expected)
+
+        ser = Series([np.nan, 1.2], dtype=dtype)
+        res = ser.where(~mask, fill_values)
+        tm.assert_series_equal(res, expected)
+
     def test_fillna_f32_upcast_with_dict(self):
         # GH-43424
         ser = Series([np.nan, 1.2], dtype=np.float32)

Original file line number	Diff line number	Diff line change
`@@ -319,6 +319,7 @@ Indexing`
`319`	`319`	- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`)
`320`	`320`	- Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`)
`321`	`321`	- Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`)
	`322`	+- Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`)
`322`	`323`	`-`
`323`	`324`
`324`	`325`	`Missing`