diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index b559d8eb463a1..ae1d2244663e3 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -318,6 +318,7 @@ Indexing - Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`) - Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`) - Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`) +- Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`) - Missing diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b1d7de0515998..96e944ec3d7f4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2017,6 +2017,13 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: raise LossySetitemError elif dtype.kind == "f": + if lib.is_integer(element) or lib.is_float(element): + casted = dtype.type(element) + if np.isnan(casted) or casted == element: + return casted + # otherwise e.g. overflow see TestCoercionFloat32 + raise LossySetitemError + if tipo is not None: # TODO: itemsize check? if tipo.kind not in ["f", "i", "u"]: @@ -2028,7 +2035,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if element._hasna: raise LossySetitemError return element - elif tipo.itemsize > dtype.itemsize: + elif tipo.itemsize > dtype.itemsize or tipo.kind != dtype.kind: if isinstance(element, np.ndarray): # e.g. TestDataFrameIndexingWhere::test_where_alignment casted = element.astype(dtype) @@ -2039,8 +2046,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: return element - if lib.is_integer(element) or lib.is_float(element): - return element raise LossySetitemError elif dtype.kind == "c": diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index ab9eef218c0da..2c2aaf87aa62d 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -753,9 +753,30 @@ def test_fillna_index_bool(self): def test_fillna_series_timedelta64(self): raise NotImplementedError - @pytest.mark.xfail(reason="Test not implemented") - def test_fillna_series_period(self): - raise NotImplementedError + @pytest.mark.parametrize( + "fill_val", + [ + 1, + 1.1, + 1 + 1j, + True, + pd.Interval(1, 2, closed="left"), + pd.Timestamp("2012-01-01", tz="US/Eastern"), + pd.Timestamp("2012-01-01"), + pd.Timedelta(days=1), + pd.Period("2016-01-01", "W"), + ], + ) + def test_fillna_series_period(self, index_or_series, fill_val): + + pi = pd.period_range("2016-01-01", periods=4, freq="D").insert(1, pd.NaT) + assert isinstance(pi.dtype, pd.PeriodDtype) + obj = index_or_series(pi) + + exp = index_or_series([pi[0], fill_val, pi[2], pi[3], pi[4]], dtype=object) + + fill_dtype = object + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) @pytest.mark.xfail(reason="Test not implemented") def test_fillna_index_timedelta64(self): diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 2261dd18baa3e..c270584342491 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1260,6 +1260,43 @@ def obj(self): return Series([1.1, 2.2, 3.3, 4.4]) +@pytest.mark.parametrize( + "val,exp_dtype", + [ + (1, np.float32), + pytest.param( + 1.1, + np.float32, + marks=pytest.mark.xfail( + reason="np.float32(1.1) ends up as 1.100000023841858, so " + "np_can_hold_element raises and we cast to float64", + ), + ), + (1 + 1j, np.complex128), + (True, object), + (np.uint8(2), np.float32), + (np.uint32(2), np.float32), + # float32 cannot hold np.iinfo(np.uint32).max exactly + # (closest it can hold is 4294967300.0 which off by 5.0), so + # we cast to float64 + (np.uint32(np.iinfo(np.uint32).max), np.float64), + (np.uint64(2), np.float32), + (np.int64(2), np.float32), + ], +) +class TestCoercionFloat32(CoercionTest): + @pytest.fixture + def obj(self): + return Series([1.1, 2.2, 3.3, 4.4], dtype=np.float32) + + def test_slice_key(self, obj, key, expected, val, indexer_sli, is_inplace): + super().test_slice_key(obj, key, expected, val, indexer_sli, is_inplace) + + if type(val) is float: + # the xfail would xpass bc test_slice_key short-circuits + raise AssertionError("xfail not relevant for this test.") + + @pytest.mark.parametrize( "val,exp_dtype", [(Timestamp("2012-01-01"), "datetime64[ns]"), (1, object), ("x", object)], diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index f339497f10029..71a1b01eb9157 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -749,14 +749,31 @@ def test_fillna_categorical_raises(self): @pytest.mark.parametrize("dtype", [float, "float32", "float64"]) @pytest.mark.parametrize("fill_type", tm.ALL_REAL_NUMPY_DTYPES) - def test_fillna_float_casting(self, dtype, fill_type): + @pytest.mark.parametrize("scalar", [True, False]) + def test_fillna_float_casting(self, dtype, fill_type, scalar): # GH-43424 ser = Series([np.nan, 1.2], dtype=dtype) fill_values = Series([2, 2], dtype=fill_type) + if scalar: + fill_values = fill_values.dtype.type(2) + result = ser.fillna(fill_values) expected = Series([2.0, 1.2], dtype=dtype) tm.assert_series_equal(result, expected) + ser = Series([np.nan, 1.2], dtype=dtype) + mask = ser.isna().to_numpy() + ser[mask] = fill_values + tm.assert_series_equal(ser, expected) + + ser = Series([np.nan, 1.2], dtype=dtype) + ser.mask(mask, fill_values, inplace=True) + tm.assert_series_equal(ser, expected) + + ser = Series([np.nan, 1.2], dtype=dtype) + res = ser.where(~mask, fill_values) + tm.assert_series_equal(res, expected) + def test_fillna_f32_upcast_with_dict(self): # GH-43424 ser = Series([np.nan, 1.2], dtype=np.float32)