diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 5891eeea98cbb..81af4fe117dc6 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -823,6 +823,7 @@ Indexing - Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`) - Bug in :meth:`Series.__setitem__` with ``datetime64[ns]`` dtype, an all-``False`` boolean mask, and an incompatible value incorrectly casting to ``object`` instead of retaining ``datetime64[ns]`` dtype (:issue:`45967`) - Bug in :meth:`Index.__getitem__` raising ``ValueError`` when indexer is from boolean dtype with ``NA`` (:issue:`45806`) +- Bug in :meth:`Series.__setitem__` losing precision when enlarging :class:`Series` with scalar (:issue:`32346`) - Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`) - Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`) - Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 20ac0fedc28d1..67242aeeb49c6 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -21,7 +21,10 @@ from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level -from pandas.core.dtypes.cast import can_hold_element +from pandas.core.dtypes.cast import ( + can_hold_element, + maybe_promote, +) from pandas.core.dtypes.common import ( is_array_like, is_bool_dtype, @@ -41,7 +44,9 @@ ) from pandas.core.dtypes.missing import ( infer_fill_value, + is_valid_na_for_dtype, isna, + na_value_for_dtype, ) from pandas.core import algorithms as algos @@ -2083,8 +2088,23 @@ def _setitem_with_indexer_missing(self, indexer, value): # We get only here with loc, so can hard code return self._setitem_with_indexer(new_indexer, value, "loc") - # this preserves dtype of the value - new_values = Series([value])._values + # this preserves dtype of the value and of the object + if is_valid_na_for_dtype(value, self.obj.dtype): + value = na_value_for_dtype(self.obj.dtype, compat=False) + new_dtype = maybe_promote(self.obj.dtype, value)[0] + elif isna(value): + new_dtype = None + elif not self.obj.empty and not is_object_dtype(self.obj.dtype): + # We should not cast, if we have object dtype because we can + # set timedeltas into object series + curr_dtype = self.obj.dtype + curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype) + new_dtype = maybe_promote(curr_dtype, value)[0] + else: + new_dtype = None + + new_values = Series([value], dtype=new_dtype)._values + if len(self.obj._values): # GH#22717 handle casting compatibility that np.concatenate # does incorrectly diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index e2a5517066ad9..b73aacae18bc5 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -534,6 +534,33 @@ def test_setitem_not_contained(self, string_series): expected = concat([string_series, app]) tm.assert_series_equal(ser, expected) + def test_setitem_keep_precision(self, any_numeric_ea_dtype): + # GH#32346 + ser = Series([1, 2], dtype=any_numeric_ea_dtype) + ser[2] = 10 + expected = Series([1, 2, 10], dtype=any_numeric_ea_dtype) + tm.assert_series_equal(ser, expected) + + @pytest.mark.parametrize("indexer", [1, 2]) + @pytest.mark.parametrize( + "na, target_na, dtype, target_dtype", + [ + (NA, NA, "Int64", "Int64"), + (NA, np.nan, "int64", "float64"), + (NaT, NaT, "int64", "object"), + (np.nan, NA, "Int64", "Int64"), + (np.nan, NA, "Float64", "Float64"), + (np.nan, np.nan, "int64", "float64"), + ], + ) + def test_setitem_enlarge_with_na(self, na, target_na, dtype, target_dtype, indexer): + # GH#32346 + ser = Series([1, 2], dtype=dtype) + ser[indexer] = na + expected_values = [1, target_na] if indexer == 1 else [1, 2, target_na] + expected = Series(expected_values, dtype=target_dtype) + tm.assert_series_equal(ser, expected) + def test_setitem_scalar_into_readonly_backing_data(): # GH#14359: test that you cannot mutate a read only buffer