Skip to content

Commit fcb1101

Browse files
MarcoGorellimeeseeksmachine
authored andcommitted
Backport PR pandas-dev#54527: BUG: Special-case setting nan into integer series
1 parent d27c467 commit fcb1101

File tree

8 files changed

+83
-65
lines changed

8 files changed

+83
-65
lines changed

pandas/_libs/lib.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ def array_equivalent_object(
195195
right: npt.NDArray[np.object_],
196196
) -> bool: ...
197197
def has_infs(arr: np.ndarray) -> bool: ... # const floating[:]
198+
def has_only_ints_or_nan(arr: np.ndarray) -> bool: ... # const floating[:]
198199
def get_reverse_indexer(
199200
indexer: np.ndarray, # const intp_t[:]
200201
length: int,

pandas/_libs/lib.pyx

+16
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,22 @@ def has_infs(floating[:] arr) -> bool:
530530
return ret
531531

532532

533+
@cython.boundscheck(False)
534+
@cython.wraparound(False)
535+
def has_only_ints_or_nan(floating[:] arr) -> bool:
536+
cdef:
537+
floating val
538+
intp_t i
539+
540+
for i in range(len(arr)):
541+
val = arr[i]
542+
if (val != val) or (val == <int64_t>val):
543+
continue
544+
else:
545+
return False
546+
return True
547+
548+
533549
def maybe_indices_to_slice(ndarray[intp_t, ndim=1] indices, int max_len):
534550
cdef:
535551
Py_ssize_t i, n = len(indices)

pandas/core/internals/blocks.py

+23
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pandas._config import using_copy_on_write
1919

2020
from pandas._libs import (
21+
NaT,
2122
internals as libinternals,
2223
lib,
2324
writers,
@@ -60,7 +61,10 @@
6061
from pandas.core.dtypes.common import (
6162
ensure_platform_int,
6263
is_1d_only_ea_dtype,
64+
is_float_dtype,
65+
is_integer_dtype,
6366
is_list_like,
67+
is_scalar,
6468
is_string_dtype,
6569
)
6670
from pandas.core.dtypes.dtypes import (
@@ -454,6 +458,25 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block:
454458
and will receive the same block
455459
"""
456460
new_dtype = find_result_type(self.values.dtype, other)
461+
462+
# In a future version of pandas, the default will be that
463+
# setting `nan` into an integer series won't raise.
464+
if (
465+
is_scalar(other)
466+
and is_integer_dtype(self.values.dtype)
467+
and isna(other)
468+
and other is not NaT
469+
):
470+
warn_on_upcast = False
471+
elif (
472+
isinstance(other, np.ndarray)
473+
and other.ndim == 1
474+
and is_integer_dtype(self.values.dtype)
475+
and is_float_dtype(other.dtype)
476+
and lib.has_only_ints_or_nan(other)
477+
):
478+
warn_on_upcast = False
479+
457480
if warn_on_upcast:
458481
warnings.warn(
459482
f"Setting an item of incompatible dtype is deprecated "

pandas/tests/frame/indexing/test_indexing.py

+13-17
Original file line numberDiff line numberDiff line change
@@ -337,18 +337,12 @@ def test_setitem(self, float_frame, using_copy_on_write):
337337
def test_setitem2(self):
338338
# dtype changing GH4204
339339
df = DataFrame([[0, 0]])
340-
with tm.assert_produces_warning(
341-
FutureWarning, match="Setting an item of incompatible dtype"
342-
):
343-
df.iloc[0] = np.nan
340+
df.iloc[0] = np.nan
344341
expected = DataFrame([[np.nan, np.nan]])
345342
tm.assert_frame_equal(df, expected)
346343

347344
df = DataFrame([[0, 0]])
348-
with tm.assert_produces_warning(
349-
FutureWarning, match="Setting an item of incompatible dtype"
350-
):
351-
df.loc[0] = np.nan
345+
df.loc[0] = np.nan
352346
tm.assert_frame_equal(df, expected)
353347

354348
def test_setitem_boolean(self, float_frame):
@@ -1579,9 +1573,7 @@ def test_setitem(self, uint64_frame):
15791573
# With NaN: because uint64 has no NaN element,
15801574
# the column should be cast to object.
15811575
df2 = df.copy()
1582-
with tm.assert_produces_warning(
1583-
FutureWarning, match="Setting an item of incompatible dtype"
1584-
):
1576+
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
15851577
df2.iloc[1, 1] = pd.NaT
15861578
df2.iloc[1, 2] = pd.NaT
15871579
result = df2["B"]
@@ -1901,19 +1893,19 @@ def test_setitem_dict_and_set_disallowed_multiindex(self, key):
19011893
class TestSetitemValidation:
19021894
# This is adapted from pandas/tests/arrays/masked/test_indexing.py
19031895
# but checks for warnings instead of errors.
1904-
def _check_setitem_invalid(self, df, invalid, indexer):
1896+
def _check_setitem_invalid(self, df, invalid, indexer, warn):
19051897
msg = "Setting an item of incompatible dtype is deprecated"
19061898
msg = re.escape(msg)
19071899

19081900
orig_df = df.copy()
19091901

19101902
# iloc
1911-
with tm.assert_produces_warning(FutureWarning, match=msg):
1903+
with tm.assert_produces_warning(warn, match=msg):
19121904
df.iloc[indexer, 0] = invalid
19131905
df = orig_df.copy()
19141906

19151907
# loc
1916-
with tm.assert_produces_warning(FutureWarning, match=msg):
1908+
with tm.assert_produces_warning(warn, match=msg):
19171909
df.loc[indexer, "a"] = invalid
19181910
df = orig_df.copy()
19191911

@@ -1934,16 +1926,20 @@ def _check_setitem_invalid(self, df, invalid, indexer):
19341926
@pytest.mark.parametrize("indexer", _indexers)
19351927
def test_setitem_validation_scalar_bool(self, invalid, indexer):
19361928
df = DataFrame({"a": [True, False, False]}, dtype="bool")
1937-
self._check_setitem_invalid(df, invalid, indexer)
1929+
self._check_setitem_invalid(df, invalid, indexer, FutureWarning)
19381930

19391931
@pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)])
19401932
@pytest.mark.parametrize("indexer", _indexers)
19411933
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
19421934
df = DataFrame({"a": [1, 2, 3]}, dtype=any_int_numpy_dtype)
1943-
self._check_setitem_invalid(df, invalid, indexer)
1935+
if isna(invalid) and invalid is not pd.NaT:
1936+
warn = None
1937+
else:
1938+
warn = FutureWarning
1939+
self._check_setitem_invalid(df, invalid, indexer, warn)
19441940

19451941
@pytest.mark.parametrize("invalid", _invalid_scalars + [True])
19461942
@pytest.mark.parametrize("indexer", _indexers)
19471943
def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer):
19481944
df = DataFrame({"a": [1, 2, None]}, dtype=float_numpy_dtype)
1949-
self._check_setitem_invalid(df, invalid, indexer)
1945+
self._check_setitem_invalid(df, invalid, indexer, FutureWarning)

pandas/tests/indexing/test_indexing.py

+4-12
Original file line numberDiff line numberDiff line change
@@ -830,8 +830,7 @@ def test_coercion_with_loc(self, expected):
830830
start_data, expected_result, warn = expected
831831

832832
start_dataframe = DataFrame({"foo": start_data})
833-
with tm.assert_produces_warning(warn, match="incompatible dtype"):
834-
start_dataframe.loc[0, ["foo"]] = None
833+
start_dataframe.loc[0, ["foo"]] = None
835834

836835
expected_dataframe = DataFrame({"foo": expected_result})
837836
tm.assert_frame_equal(start_dataframe, expected_dataframe)
@@ -841,8 +840,7 @@ def test_coercion_with_setitem_and_dataframe(self, expected):
841840
start_data, expected_result, warn = expected
842841

843842
start_dataframe = DataFrame({"foo": start_data})
844-
with tm.assert_produces_warning(warn, match="incompatible dtype"):
845-
start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None
843+
start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None
846844

847845
expected_dataframe = DataFrame({"foo": expected_result})
848846
tm.assert_frame_equal(start_dataframe, expected_dataframe)
@@ -852,10 +850,7 @@ def test_none_coercion_loc_and_dataframe(self, expected):
852850
start_data, expected_result, warn = expected
853851

854852
start_dataframe = DataFrame({"foo": start_data})
855-
with tm.assert_produces_warning(warn, match="incompatible dtype"):
856-
start_dataframe.loc[
857-
start_dataframe["foo"] == start_dataframe["foo"][0]
858-
] = None
853+
start_dataframe.loc[start_dataframe["foo"] == start_dataframe["foo"][0]] = None
859854

860855
expected_dataframe = DataFrame({"foo": expected_result})
861856
tm.assert_frame_equal(start_dataframe, expected_dataframe)
@@ -869,10 +864,7 @@ def test_none_coercion_mixed_dtypes(self):
869864
"d": ["a", "b", "c"],
870865
}
871866
)
872-
with tm.assert_produces_warning(
873-
FutureWarning, match="item of incompatible dtype"
874-
):
875-
start_dataframe.iloc[0] = None
867+
start_dataframe.iloc[0] = None
876868

877869
exp = DataFrame(
878870
{

pandas/tests/series/indexing/test_indexing.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
Timestamp,
2020
concat,
2121
date_range,
22+
isna,
2223
period_range,
2324
timedelta_range,
2425
)
@@ -456,25 +457,25 @@ def test_setitem_dict_and_set_disallowed_multiindex(self, key):
456457
class TestSetitemValidation:
457458
# This is adapted from pandas/tests/arrays/masked/test_indexing.py
458459
# but checks for warnings instead of errors.
459-
def _check_setitem_invalid(self, ser, invalid, indexer):
460+
def _check_setitem_invalid(self, ser, invalid, indexer, warn):
460461
msg = "Setting an item of incompatible dtype is deprecated"
461462
msg = re.escape(msg)
462463

463464
orig_ser = ser.copy()
464465

465-
with tm.assert_produces_warning(FutureWarning, match=msg):
466+
with tm.assert_produces_warning(warn, match=msg):
466467
ser[indexer] = invalid
467468
ser = orig_ser.copy()
468469

469-
with tm.assert_produces_warning(FutureWarning, match=msg):
470+
with tm.assert_produces_warning(warn, match=msg):
470471
ser.iloc[indexer] = invalid
471472
ser = orig_ser.copy()
472473

473-
with tm.assert_produces_warning(FutureWarning, match=msg):
474+
with tm.assert_produces_warning(warn, match=msg):
474475
ser.loc[indexer] = invalid
475476
ser = orig_ser.copy()
476477

477-
with tm.assert_produces_warning(FutureWarning, match=msg):
478+
with tm.assert_produces_warning(warn, match=msg):
478479
ser[:] = invalid
479480

480481
_invalid_scalars = [
@@ -494,16 +495,20 @@ def _check_setitem_invalid(self, ser, invalid, indexer):
494495
@pytest.mark.parametrize("indexer", _indexers)
495496
def test_setitem_validation_scalar_bool(self, invalid, indexer):
496497
ser = Series([True, False, False], dtype="bool")
497-
self._check_setitem_invalid(ser, invalid, indexer)
498+
self._check_setitem_invalid(ser, invalid, indexer, FutureWarning)
498499

499500
@pytest.mark.parametrize("invalid", _invalid_scalars + [True, 1.5, np.float64(1.5)])
500501
@pytest.mark.parametrize("indexer", _indexers)
501502
def test_setitem_validation_scalar_int(self, invalid, any_int_numpy_dtype, indexer):
502503
ser = Series([1, 2, 3], dtype=any_int_numpy_dtype)
503-
self._check_setitem_invalid(ser, invalid, indexer)
504+
if isna(invalid) and invalid is not NaT:
505+
warn = None
506+
else:
507+
warn = FutureWarning
508+
self._check_setitem_invalid(ser, invalid, indexer, warn)
504509

505510
@pytest.mark.parametrize("invalid", _invalid_scalars + [True])
506511
@pytest.mark.parametrize("indexer", _indexers)
507512
def test_setitem_validation_scalar_float(self, invalid, float_numpy_dtype, indexer):
508513
ser = Series([1, 2, None], dtype=float_numpy_dtype)
509-
self._check_setitem_invalid(ser, invalid, indexer)
514+
self._check_setitem_invalid(ser, invalid, indexer, FutureWarning)

pandas/tests/series/indexing/test_setitem.py

+12-17
Original file line numberDiff line numberDiff line change
@@ -191,14 +191,11 @@ def test_setitem_series_object_dtype(self, indexer, ser_index):
191191
expected = Series([Series([42], index=[ser_index]), 0], dtype="object")
192192
tm.assert_series_equal(ser, expected)
193193

194-
@pytest.mark.parametrize(
195-
"index, exp_value, warn", [(0, 42, None), (1, np.nan, FutureWarning)]
196-
)
197-
def test_setitem_series(self, index, exp_value, warn):
194+
@pytest.mark.parametrize("index, exp_value", [(0, 42), (1, np.nan)])
195+
def test_setitem_series(self, index, exp_value):
198196
# GH#38303
199197
ser = Series([0, 0])
200-
with tm.assert_produces_warning(warn, match="item of incompatible dtype"):
201-
ser.loc[0] = Series([42], index=[index])
198+
ser.loc[0] = Series([42], index=[index])
202199
expected = Series([exp_value, 0])
203200
tm.assert_series_equal(ser, expected)
204201

@@ -575,15 +572,15 @@ def test_setitem_keep_precision(self, any_numeric_ea_dtype):
575572
[
576573
(NA, NA, "Int64", "Int64", 1, None),
577574
(NA, NA, "Int64", "Int64", 2, None),
578-
(NA, np.nan, "int64", "float64", 1, FutureWarning),
575+
(NA, np.nan, "int64", "float64", 1, None),
579576
(NA, np.nan, "int64", "float64", 2, None),
580577
(NaT, NaT, "int64", "object", 1, FutureWarning),
581578
(NaT, NaT, "int64", "object", 2, None),
582579
(np.nan, NA, "Int64", "Int64", 1, None),
583580
(np.nan, NA, "Int64", "Int64", 2, None),
584581
(np.nan, NA, "Float64", "Float64", 1, None),
585582
(np.nan, NA, "Float64", "Float64", 2, None),
586-
(np.nan, np.nan, "int64", "float64", 1, FutureWarning),
583+
(np.nan, np.nan, "int64", "float64", 1, None),
587584
(np.nan, np.nan, "int64", "float64", 2, None),
588585
],
589586
)
@@ -592,7 +589,7 @@ def test_setitem_enlarge_with_na(
592589
):
593590
# GH#32346
594591
ser = Series([1, 2], dtype=dtype)
595-
with tm.assert_produces_warning(warn, match="item of incompatible dtype"):
592+
with tm.assert_produces_warning(warn, match="incompatible dtype"):
596593
ser[indexer] = na
597594
expected_values = [1, target_na] if indexer == 1 else [1, 2, target_na]
598595
expected = Series(expected_values, dtype=target_dtype)
@@ -884,7 +881,7 @@ def test_index_putmask(self, obj, key, expected, warn, val):
884881
Series([2, 3, 4, 5, 6, 7, 8, 9, 10]),
885882
Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]),
886883
slice(None, None, 2),
887-
FutureWarning,
884+
None,
888885
id="int_series_slice_key_step",
889886
),
890887
pytest.param(
@@ -899,15 +896,15 @@ def test_index_putmask(self, obj, key, expected, warn, val):
899896
Series(np.arange(10)),
900897
Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]),
901898
slice(None, 5),
902-
FutureWarning,
899+
None,
903900
id="int_series_slice_key",
904901
),
905902
pytest.param(
906903
# changes dtype GH#4463
907904
Series([1, 2, 3]),
908905
Series([np.nan, 2, 3]),
909906
0,
910-
FutureWarning,
907+
None,
911908
id="int_series_int_key",
912909
),
913910
pytest.param(
@@ -1134,7 +1131,7 @@ def warn(self):
11341131
"obj,expected,warn",
11351132
[
11361133
# For numeric series, we should coerce to NaN.
1137-
(Series([1, 2, 3]), Series([np.nan, 2, 3]), FutureWarning),
1134+
(Series([1, 2, 3]), Series([np.nan, 2, 3]), None),
11381135
(Series([1.0, 2.0, 3.0]), Series([np.nan, 2.0, 3.0]), None),
11391136
# For datetime series, we should coerce to NaT.
11401137
(
@@ -1584,13 +1581,11 @@ def test_20643_comment():
15841581
expected = Series([np.nan, 1, 2], index=["a", "b", "c"])
15851582

15861583
ser = orig.copy()
1587-
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
1588-
ser.iat[0] = None
1584+
ser.iat[0] = None
15891585
tm.assert_series_equal(ser, expected)
15901586

15911587
ser = orig.copy()
1592-
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
1593-
ser.iloc[0] = None
1588+
ser.iloc[0] = None
15941589
tm.assert_series_equal(ser, expected)
15951590

15961591

pandas/tests/series/methods/test_convert_dtypes.py

+1-11
Original file line numberDiff line numberDiff line change
@@ -206,17 +206,7 @@ def test_convert_dtypes(
206206
# Test that it is a copy
207207
copy = series.copy(deep=True)
208208

209-
if result.notna().sum() > 0 and result.dtype in [
210-
"int8",
211-
"uint8",
212-
"int16",
213-
"uint16",
214-
"int32",
215-
"uint32",
216-
"int64",
217-
"uint64",
218-
"interval[int64, right]",
219-
]:
209+
if result.notna().sum() > 0 and result.dtype in ["interval[int64, right]"]:
220210
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
221211
result[result.notna()] = np.nan
222212
else:

0 commit comments

Comments
 (0)