Skip to content

BUG: Series[float32].__setitem__(int_cant_hold_in_int32) not coercing #45844

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Feb 9, 2022
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ Indexing
- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`)
- Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`)
- Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`)
- Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`)
-

Missing
Expand Down
11 changes: 8 additions & 3 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -2017,6 +2017,13 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
raise LossySetitemError

elif dtype.kind == "f":
if lib.is_integer(element) or lib.is_float(element):
casted = dtype.type(element)
if np.isnan(casted) or casted == element:
return casted
# otherwise e.g. overflow see TestCoercionFloat32
raise LossySetitemError

if tipo is not None:
# TODO: itemsize check?
if tipo.kind not in ["f", "i", "u"]:
Expand All @@ -2028,7 +2035,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
if element._hasna:
raise LossySetitemError
return element
elif tipo.itemsize > dtype.itemsize:
elif tipo.itemsize > dtype.itemsize or tipo.kind != dtype.kind:
if isinstance(element, np.ndarray):
# e.g. TestDataFrameIndexingWhere::test_where_alignment
casted = element.astype(dtype)
Expand All @@ -2039,8 +2046,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:

return element

if lib.is_integer(element) or lib.is_float(element):
return element
raise LossySetitemError

elif dtype.kind == "c":
Expand Down
27 changes: 24 additions & 3 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,9 +753,30 @@ def test_fillna_index_bool(self):
def test_fillna_series_timedelta64(self):
raise NotImplementedError

@pytest.mark.xfail(reason="Test not implemented")
def test_fillna_series_period(self):
raise NotImplementedError
@pytest.mark.parametrize(
"fill_val",
[
1,
1.1,
1 + 1j,
True,
pd.Interval(1, 2, closed="left"),
pd.Timestamp("2012-01-01", tz="US/Eastern"),
pd.Timestamp("2012-01-01"),
pd.Timedelta(days=1),
pd.Period("2016-01-01", "W"),
],
)
def test_fillna_series_period(self, index_or_series, fill_val):

pi = pd.period_range("2016-01-01", periods=4, freq="D").insert(1, pd.NaT)
assert isinstance(pi.dtype, pd.PeriodDtype)
obj = index_or_series(pi)

exp = index_or_series([pi[0], fill_val, pi[2], pi[3], pi[4]], dtype=object)

fill_dtype = object
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)

@pytest.mark.xfail(reason="Test not implemented")
def test_fillna_index_timedelta64(self):
Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -1260,6 +1260,43 @@ def obj(self):
return Series([1.1, 2.2, 3.3, 4.4])


@pytest.mark.parametrize(
"val,exp_dtype",
[
(1, np.float32),
pytest.param(
1.1,
np.float32,
marks=pytest.mark.xfail(
reason="np.float32(1.1) ends up as 1.100000023841858, so "
"np_can_hold_element raises and we cast to float64",
),
),
(1 + 1j, np.complex128),
(True, object),
(np.uint8(2), np.float32),
(np.uint32(2), np.float32),
# float32 cannot hold np.iinfo(np.uint32).max exactly
# (closest it can hold is 4294967300.0 which off by 5.0), so
# we cast to float64
(np.uint32(np.iinfo(np.uint32).max), np.float64),
(np.uint64(2), np.float32),
(np.int64(2), np.float32),
],
)
class TestCoercionFloat32(CoercionTest):
@pytest.fixture
def obj(self):
return Series([1.1, 2.2, 3.3, 4.4], dtype=np.float32)

def test_slice_key(self, obj, key, expected, val, indexer_sli, is_inplace):
super().test_slice_key(obj, key, expected, val, indexer_sli, is_inplace)

if type(val) is float:
# the xfail would xpass bc test_slice_key short-circuits
raise AssertionError("xfail not relevant for this test.")


@pytest.mark.parametrize(
"val,exp_dtype",
[(Timestamp("2012-01-01"), "datetime64[ns]"), (1, object), ("x", object)],
Expand Down
19 changes: 18 additions & 1 deletion pandas/tests/series/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,14 +749,31 @@ def test_fillna_categorical_raises(self):

@pytest.mark.parametrize("dtype", [float, "float32", "float64"])
@pytest.mark.parametrize("fill_type", tm.ALL_REAL_NUMPY_DTYPES)
def test_fillna_float_casting(self, dtype, fill_type):
@pytest.mark.parametrize("scalar", [True, False])
def test_fillna_float_casting(self, dtype, fill_type, scalar):
# GH-43424
ser = Series([np.nan, 1.2], dtype=dtype)
fill_values = Series([2, 2], dtype=fill_type)
if scalar:
fill_values = fill_values.dtype.type(2)

result = ser.fillna(fill_values)
expected = Series([2.0, 1.2], dtype=dtype)
tm.assert_series_equal(result, expected)

ser = Series([np.nan, 1.2], dtype=dtype)
mask = ser.isna().to_numpy()
ser[mask] = fill_values
tm.assert_series_equal(ser, expected)

ser = Series([np.nan, 1.2], dtype=dtype)
ser.mask(mask, fill_values, inplace=True)
tm.assert_series_equal(ser, expected)

ser = Series([np.nan, 1.2], dtype=dtype)
res = ser.where(~mask, fill_values)
tm.assert_series_equal(res, expected)

def test_fillna_f32_upcast_with_dict(self):
# GH-43424
ser = Series([np.nan, 1.2], dtype=np.float32)
Expand Down