Skip to content

Commit 17b15e9

Browse files
authored
BUG: Series[float32].__setitem__(int_cant_hold_in_int32) not coercing (#45844)
1 parent 82b6175 commit 17b15e9

File tree

5 files changed

+88
-7
lines changed

5 files changed

+88
-7
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ Indexing
319319
- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`)
320320
- Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`)
321321
- Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`)
322+
- Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`)
322323
-
323324

324325
Missing

pandas/core/dtypes/cast.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -2018,6 +2018,13 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
20182018
raise LossySetitemError
20192019

20202020
elif dtype.kind == "f":
2021+
if lib.is_integer(element) or lib.is_float(element):
2022+
casted = dtype.type(element)
2023+
if np.isnan(casted) or casted == element:
2024+
return casted
2025+
# otherwise e.g. overflow see TestCoercionFloat32
2026+
raise LossySetitemError
2027+
20212028
if tipo is not None:
20222029
# TODO: itemsize check?
20232030
if tipo.kind not in ["f", "i", "u"]:
@@ -2029,7 +2036,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
20292036
if element._hasna:
20302037
raise LossySetitemError
20312038
return element
2032-
elif tipo.itemsize > dtype.itemsize:
2039+
elif tipo.itemsize > dtype.itemsize or tipo.kind != dtype.kind:
20332040
if isinstance(element, np.ndarray):
20342041
# e.g. TestDataFrameIndexingWhere::test_where_alignment
20352042
casted = element.astype(dtype)
@@ -2040,8 +2047,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
20402047

20412048
return element
20422049

2043-
if lib.is_integer(element) or lib.is_float(element):
2044-
return element
20452050
raise LossySetitemError
20462051

20472052
elif dtype.kind == "c":

pandas/tests/indexing/test_coercion.py

+24-3
Original file line numberDiff line numberDiff line change
@@ -753,9 +753,30 @@ def test_fillna_index_bool(self):
753753
def test_fillna_series_timedelta64(self):
754754
raise NotImplementedError
755755

756-
@pytest.mark.xfail(reason="Test not implemented")
757-
def test_fillna_series_period(self):
758-
raise NotImplementedError
756+
@pytest.mark.parametrize(
757+
"fill_val",
758+
[
759+
1,
760+
1.1,
761+
1 + 1j,
762+
True,
763+
pd.Interval(1, 2, closed="left"),
764+
pd.Timestamp("2012-01-01", tz="US/Eastern"),
765+
pd.Timestamp("2012-01-01"),
766+
pd.Timedelta(days=1),
767+
pd.Period("2016-01-01", "W"),
768+
],
769+
)
770+
def test_fillna_series_period(self, index_or_series, fill_val):
771+
772+
pi = pd.period_range("2016-01-01", periods=4, freq="D").insert(1, pd.NaT)
773+
assert isinstance(pi.dtype, pd.PeriodDtype)
774+
obj = index_or_series(pi)
775+
776+
exp = index_or_series([pi[0], fill_val, pi[2], pi[3], pi[4]], dtype=object)
777+
778+
fill_dtype = object
779+
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
759780

760781
@pytest.mark.xfail(reason="Test not implemented")
761782
def test_fillna_index_timedelta64(self):

pandas/tests/series/indexing/test_setitem.py

+37
Original file line numberDiff line numberDiff line change
@@ -1260,6 +1260,43 @@ def obj(self):
12601260
return Series([1.1, 2.2, 3.3, 4.4])
12611261

12621262

1263+
@pytest.mark.parametrize(
1264+
"val,exp_dtype",
1265+
[
1266+
(1, np.float32),
1267+
pytest.param(
1268+
1.1,
1269+
np.float32,
1270+
marks=pytest.mark.xfail(
1271+
reason="np.float32(1.1) ends up as 1.100000023841858, so "
1272+
"np_can_hold_element raises and we cast to float64",
1273+
),
1274+
),
1275+
(1 + 1j, np.complex128),
1276+
(True, object),
1277+
(np.uint8(2), np.float32),
1278+
(np.uint32(2), np.float32),
1279+
# float32 cannot hold np.iinfo(np.uint32).max exactly
1280+
# (closest it can hold is 4294967300.0 which off by 5.0), so
1281+
# we cast to float64
1282+
(np.uint32(np.iinfo(np.uint32).max), np.float64),
1283+
(np.uint64(2), np.float32),
1284+
(np.int64(2), np.float32),
1285+
],
1286+
)
1287+
class TestCoercionFloat32(CoercionTest):
1288+
@pytest.fixture
1289+
def obj(self):
1290+
return Series([1.1, 2.2, 3.3, 4.4], dtype=np.float32)
1291+
1292+
def test_slice_key(self, obj, key, expected, val, indexer_sli, is_inplace):
1293+
super().test_slice_key(obj, key, expected, val, indexer_sli, is_inplace)
1294+
1295+
if type(val) is float:
1296+
# the xfail would xpass bc test_slice_key short-circuits
1297+
raise AssertionError("xfail not relevant for this test.")
1298+
1299+
12631300
@pytest.mark.parametrize(
12641301
"val,exp_dtype",
12651302
[(Timestamp("2012-01-01"), "datetime64[ns]"), (1, object), ("x", object)],

pandas/tests/series/methods/test_fillna.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -749,14 +749,31 @@ def test_fillna_categorical_raises(self):
749749

750750
@pytest.mark.parametrize("dtype", [float, "float32", "float64"])
751751
@pytest.mark.parametrize("fill_type", tm.ALL_REAL_NUMPY_DTYPES)
752-
def test_fillna_float_casting(self, dtype, fill_type):
752+
@pytest.mark.parametrize("scalar", [True, False])
753+
def test_fillna_float_casting(self, dtype, fill_type, scalar):
753754
# GH-43424
754755
ser = Series([np.nan, 1.2], dtype=dtype)
755756
fill_values = Series([2, 2], dtype=fill_type)
757+
if scalar:
758+
fill_values = fill_values.dtype.type(2)
759+
756760
result = ser.fillna(fill_values)
757761
expected = Series([2.0, 1.2], dtype=dtype)
758762
tm.assert_series_equal(result, expected)
759763

764+
ser = Series([np.nan, 1.2], dtype=dtype)
765+
mask = ser.isna().to_numpy()
766+
ser[mask] = fill_values
767+
tm.assert_series_equal(ser, expected)
768+
769+
ser = Series([np.nan, 1.2], dtype=dtype)
770+
ser.mask(mask, fill_values, inplace=True)
771+
tm.assert_series_equal(ser, expected)
772+
773+
ser = Series([np.nan, 1.2], dtype=dtype)
774+
res = ser.where(~mask, fill_values)
775+
tm.assert_series_equal(res, expected)
776+
760777
def test_fillna_f32_upcast_with_dict(self):
761778
# GH-43424
762779
ser = Series([np.nan, 1.2], dtype=np.float32)

0 commit comments

Comments
 (0)