Skip to content

Commit 3510b1f

Browse files
authored
BUG: setting pd.NA into Series casts to object (#45431)
1 parent f7a41b4 commit 3510b1f

File tree

5 files changed

+22
-18
lines changed

5 files changed

+22
-18
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ Indexing
214214
- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
215215
- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`)
216216
- Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`)
217+
- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`)
217218
-
218219

219220
Missing

pandas/core/frame.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -8887,9 +8887,9 @@ def applymap(
88878887
>>> df_copy = df.copy()
88888888
>>> df_copy.iloc[0, 0] = pd.NA
88898889
>>> df_copy.applymap(lambda x: len(str(x)), na_action='ignore')
8890-
0 1
8891-
0 <NA> 4
8892-
1 5 5
8890+
0 1
8891+
0 NaN 4
8892+
1 5.0 5
88938893
88948894
Note that a vectorized version of `func` often exists, which will
88958895
be much faster. You could square each number elementwise.

pandas/core/indexes/base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5233,7 +5233,8 @@ def putmask(self, mask, value) -> Index:
52335233
if noop:
52345234
return self.copy()
52355235

5236-
if value is None and (self._is_numeric_dtype or self.dtype == object):
5236+
if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):
5237+
# e.g. None -> np.nan, see also Block._standardize_fill_value
52375238
value = self._na_value
52385239
try:
52395240
converted = self._validate_fill_value(value)

pandas/core/internals/blocks.py

+10-11
Original file line numberDiff line numberDiff line change
@@ -878,6 +878,12 @@ def _replace_coerce(
878878

879879
# ---------------------------------------------------------------------
880880

881+
def _standardize_fill_value(self, value):
882+
# if we are passed a scalar None, convert it here
883+
if self.dtype != _dtype_obj and is_valid_na_for_dtype(value, self.dtype):
884+
value = self.fill_value
885+
return value
886+
881887
def _maybe_squeeze_arg(self, arg: np.ndarray) -> np.ndarray:
882888
"""
883889
For compatibility with 1D-only ExtensionArrays.
@@ -911,10 +917,7 @@ def setitem(self, indexer, value):
911917
be a compatible shape.
912918
"""
913919

914-
# coerce None values, if appropriate
915-
if value is None:
916-
if self.is_numeric:
917-
value = np.nan
920+
value = self._standardize_fill_value(value)
918921

919922
# coerce if block dtype can store value
920923
if not self._can_hold_element(value):
@@ -968,9 +971,7 @@ def putmask(self, mask, new) -> list[Block]:
968971
if new is lib.no_default:
969972
new = self.fill_value
970973

971-
# if we are passed a scalar None, convert it here
972-
if not self.is_object and is_valid_na_for_dtype(new, self.dtype):
973-
new = self.fill_value
974+
new = self._standardize_fill_value(new)
974975

975976
if self._can_hold_element(new):
976977
putmask_without_repeat(values.T, mask, new)
@@ -1152,8 +1153,7 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Blo
11521153
# see test_shift_object_non_scalar_fill
11531154
raise ValueError("fill_value must be a scalar")
11541155

1155-
if is_valid_na_for_dtype(fill_value, self.dtype) and self.dtype != _dtype_obj:
1156-
fill_value = self.fill_value
1156+
fill_value = self._standardize_fill_value(fill_value)
11571157

11581158
if not self._can_hold_element(fill_value):
11591159
nb = self.coerce_to_target_dtype(fill_value)
@@ -1196,8 +1196,7 @@ def where(self, other, cond) -> list[Block]:
11961196
if other is lib.no_default:
11971197
other = self.fill_value
11981198

1199-
if is_valid_na_for_dtype(other, self.dtype) and self.dtype != _dtype_obj:
1200-
other = self.fill_value
1199+
other = self._standardize_fill_value(other)
12011200

12021201
if not self._can_hold_element(other):
12031202
# we cannot coerce, return a compat dtype

pandas/tests/series/indexing/test_setitem.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pandas.core.dtypes.common import is_list_like
1010

1111
from pandas import (
12+
NA,
1213
Categorical,
1314
DataFrame,
1415
DatetimeIndex,
@@ -770,11 +771,13 @@ def test_index_putmask(self, obj, key, expected, val):
770771
],
771772
)
772773
class TestSetitemCastingEquivalents(SetitemCastingEquivalents):
773-
@pytest.fixture(params=[np.nan, np.float64("NaN")])
774+
@pytest.fixture(params=[np.nan, np.float64("NaN"), None, NA])
774775
def val(self, request):
775776
"""
776-
One python float NaN, one np.float64. Only np.float64 has a `dtype`
777-
attribute.
777+
NA values that should generally be valid_na for *all* dtypes.
778+
779+
Include both python float NaN and np.float64; only np.float64 has a
780+
`dtype` attribute.
778781
"""
779782
return request.param
780783

0 commit comments

Comments
 (0)