diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst index 1dae2e8463c27..8e1cb32cb856d 100644 --- a/doc/source/whatsnew/v0.21.0.rst +++ b/doc/source/whatsnew/v0.21.0.rst @@ -784,10 +784,15 @@ non-datetime-like item being assigned (:issue:`14145`). These now coerce to ``object`` dtype. -.. ipython:: python +.. code-block:: python - s[1] = 1 - s + In [1]: s[1] = 1 + + In [2]: s + Out[2]: + 0 2011-01-01 00:00:00 + 1 1 + dtype: object - Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 91efcfd590c01..6e9ff503b95f5 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -295,8 +295,99 @@ Other API changes .. --------------------------------------------------------------------------- .. _whatsnew_210.deprecations: -Deprecate parsing datetimes with mixed time zones -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Deprecations +~~~~~~~~~~~~ + +Deprecated silent upcasting in setitem-like Series operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Setitem-like operations on Series (or DataFrame columns) which silently upcast the dtype are +deprecated and show a warning. Examples of affected operations are: + + - ``ser.fillna('foo', inplace=True)`` + - ``ser.where(ser.isna(), 'foo', inplace=True)`` + - ``ser.iloc[indexer] = 'foo'`` + - ``ser.loc[indexer] = 'foo'`` + - ``df.iloc[indexer, 0] = 'foo'`` + - ``df.loc[indexer, 'a'] = 'foo'`` + - ``ser[indexer] = 'foo'`` + +where ``ser`` is a :class:`Series`, ``df`` is a :class:`DataFrame`, and ``indexer`` +could be a slice, a mask, a single value, a list or array of values, or any other +allowed indexer. + +In a future version, these will raise an error and you should cast to a common dtype first. + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: ser = pd.Series([1, 2, 3]) + + In [2]: ser + Out[2]: + 0 1 + 1 2 + 2 3 + dtype: int64 + + In [3]: ser[0] = 'not an int64' + + In [4]: ser + Out[4]: + 0 not an int64 + 1 2 + 2 3 + dtype: object + +*New behavior*: + +.. code-block:: ipython + + In [1]: ser = pd.Series([1, 2, 3]) + + In [2]: ser + Out[2]: + 0 1 + 1 2 + 2 3 + dtype: int64 + + In [3]: ser[0] = 'not an int64' + FutureWarning: + Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. + Value 'not an int64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first. + + In [4]: ser + Out[4]: + 0 not an int64 + 1 2 + 2 3 + dtype: object + +To retain the current behaviour, in the case above you could cast ``ser`` to ``object`` dtype first: + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + ser = ser.astype('object') + ser[0] = 'not an int64' + ser + +Depending on the use-case, it might be more appropriate to cast to a different dtype. +In the following, for example, we cast to ``float64``: + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + ser = ser.astype('float64') + ser[0] = 1.1 + ser + +For further reading, please see https://pandas.pydata.org/pdeps/0006-ban-upcasting.html. + +Deprecated parsing datetimes with mixed time zones +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Parsing datetimes with mixed time zones is deprecated and shows a warning unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`50887`) @@ -341,7 +432,7 @@ and ``datetime.datetime.strptime``: pd.Series(data).apply(lambda x: dt.datetime.strptime(x, '%Y-%m-%d %H:%M:%S%z')) Other Deprecations -~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^ - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) - Deprecated 'downcast' keyword in :meth:`Index.fillna` (:issue:`53956`) - Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``ffill`` or ``bfill`` before calling ``pct_change`` instead (:issue:`53491`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3e988068dbc12..f9c05397076e4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -461,7 +461,7 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: FutureWarning, stacklevel=find_stack_level(), ) - if self.dtype == new_dtype: + if self.values.dtype == new_dtype: raise AssertionError( f"Did not expect new dtype {new_dtype} to equal self.dtype " f"{self.values.dtype}. Please report a bug at " @@ -1723,11 +1723,11 @@ def setitem(self, indexer, value, using_cow: bool = False): if isinstance(self.dtype, IntervalDtype): # see TestSetitemFloatIntervalWithIntIntervalValues - nb = self.coerce_to_target_dtype(orig_value) + nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True) return nb.setitem(orig_indexer, orig_value) elif isinstance(self, NDArrayBackedExtensionBlock): - nb = self.coerce_to_target_dtype(orig_value) + nb = self.coerce_to_target_dtype(orig_value, warn_on_upcast=True) return nb.setitem(orig_indexer, orig_value) else: @@ -1841,13 +1841,13 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: if isinstance(self.dtype, IntervalDtype): # Discussion about what we want to support in the general # case GH#39584 - blk = self.coerce_to_target_dtype(orig_new) + blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True) return blk.putmask(orig_mask, orig_new) elif isinstance(self, NDArrayBackedExtensionBlock): # NB: not (yet) the same as # isinstance(values, NDArrayBackedExtensionArray) - blk = self.coerce_to_target_dtype(orig_new) + blk = self.coerce_to_target_dtype(orig_new, warn_on_upcast=True) return blk.putmask(orig_mask, orig_new) else: diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index e62d35ade149d..7e7eac1a90d66 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -831,7 +831,10 @@ def test_setitem_single_column_mixed_datetime(self): tm.assert_series_equal(result, expected) # GH#16674 iNaT is treated as an integer when given by the user - df.loc["b", "timestamp"] = iNaT + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + df.loc["b", "timestamp"] = iNaT assert not isna(df.loc["b", "timestamp"]) assert df["timestamp"].dtype == np.object_ assert df.loc["b", "timestamp"] == iNaT @@ -862,7 +865,10 @@ def test_setitem_mixed_datetime(self): df = DataFrame(0, columns=list("ab"), index=range(6)) df["b"] = pd.NaT df.loc[0, "b"] = datetime(2012, 1, 1) - df.loc[1, "b"] = 1 + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + df.loc[1, "b"] = 1 df.loc[[2, 3], "b"] = "x", "y" A = np.array( [ diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 88015992f2893..40733a52bdd82 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -735,7 +735,10 @@ def test_where_interval_fullop_downcast(self, frame_or_series): tm.assert_equal(res, other.astype(np.int64)) # unlike where, Block.putmask does not downcast - obj.mask(obj.notna(), other, inplace=True) + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + obj.mask(obj.notna(), other, inplace=True) tm.assert_equal(obj, other.astype(object)) @pytest.mark.parametrize( @@ -775,7 +778,10 @@ def test_where_datetimelike_noop(self, dtype): tm.assert_frame_equal(res5, expected) # unlike where, Block.putmask does not downcast - df.mask(~mask2, 4, inplace=True) + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + df.mask(~mask2, 4, inplace=True) tm.assert_frame_equal(df, expected.astype(object)) @@ -930,7 +936,10 @@ def test_where_period_invalid_na(frame_or_series, as_cat, request): result = obj.mask(mask, tdnat) tm.assert_equal(result, expected) - obj.mask(mask, tdnat, inplace=True) + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + obj.mask(mask, tdnat, inplace=True) tm.assert_equal(obj, expected) @@ -1006,7 +1015,10 @@ def test_where_dt64_2d(): # setting all of one column, none of the other expected = DataFrame({"A": other[:, 0], "B": dta[:, 1]}) - _check_where_equivalences(df, mask, other, expected) + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + _check_where_equivalences(df, mask, other, expected) # setting part of one column, none of the other mask[1, 0] = True @@ -1016,7 +1028,10 @@ def test_where_dt64_2d(): "B": dta[:, 1], } ) - _check_where_equivalences(df, mask, other, expected) + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + _check_where_equivalences(df, mask, other, expected) # setting nothing in either column mask[:] = True diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 31f404258a9bb..c87f04efffcf4 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2549,8 +2549,7 @@ def check_views(c_only: bool = False): check_views() # TODO: most of the rest of this test belongs in indexing tests - # TODO: 'm' and 'M' should warn - if lib.is_np_dtype(df.dtypes.iloc[0], "fciuOmM"): + if lib.is_np_dtype(df.dtypes.iloc[0], "fciuO"): warn = None else: warn = FutureWarning diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index 01315647c464b..ec3b36d386434 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -23,7 +23,8 @@ def test_at_timezone(): # https://github.com/pandas-dev/pandas/issues/33544 result = DataFrame({"foo": [datetime(2000, 1, 1)]}) - result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc) expected = DataFrame( {"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object ) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 2bbeebcff8ebd..4e7d43ca22c27 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1449,7 +1449,8 @@ def test_loc_setitem_datetime_coercion(self): df.loc[0:1, "c"] = np.datetime64("2008-08-08") assert Timestamp("2008-08-08") == df.loc[0, "c"] assert Timestamp("2008-08-08") == df.loc[1, "c"] - df.loc[2, "c"] = date(2005, 5, 5) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[2, "c"] = date(2005, 5, 5) assert Timestamp("2005-05-05").date() == df.loc[2, "c"] @pytest.mark.parametrize("idxer", ["var", ["var"]]) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 8a4b5fd5f2e01..9aeae8d5133d3 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1312,17 +1312,20 @@ def test_interval_can_hold_element(self, dtype, element): # `elem` to not have the same length as `arr` ii2 = IntervalIndex.from_breaks(arr[:-1], closed="neither") elem = element(ii2) - self.check_series_setitem(elem, ii, False) + with tm.assert_produces_warning(FutureWarning): + self.check_series_setitem(elem, ii, False) assert not blk._can_hold_element(elem) ii3 = IntervalIndex.from_breaks([Timestamp(1), Timestamp(3), Timestamp(4)]) elem = element(ii3) - self.check_series_setitem(elem, ii, False) + with tm.assert_produces_warning(FutureWarning): + self.check_series_setitem(elem, ii, False) assert not blk._can_hold_element(elem) ii4 = IntervalIndex.from_breaks([Timedelta(1), Timedelta(3), Timedelta(4)]) elem = element(ii4) - self.check_series_setitem(elem, ii, False) + with tm.assert_produces_warning(FutureWarning): + self.check_series_setitem(elem, ii, False) assert not blk._can_hold_element(elem) def test_period_can_hold_element_emptylist(self): @@ -1341,11 +1344,13 @@ def test_period_can_hold_element(self, element): # `elem` to not have the same length as `arr` pi2 = pi.asfreq("D")[:-1] elem = element(pi2) - self.check_series_setitem(elem, pi, False) + with tm.assert_produces_warning(FutureWarning): + self.check_series_setitem(elem, pi, False) dti = pi.to_timestamp("S")[:-1] elem = element(dti) - self.check_series_setitem(elem, pi, False) + with tm.assert_produces_warning(FutureWarning): + self.check_series_setitem(elem, pi, False) def check_can_hold_element(self, obj, elem, inplace: bool): blk = obj._mgr.blocks[0] diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 3020348ff2a07..d3400ad17c7d9 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -872,7 +872,7 @@ def test_index_putmask(self, obj, key, expected, warn, val): dtype="interval[float64]", ), 1, - None, + FutureWarning, id="interval_int_na_value", ), pytest.param( @@ -1090,8 +1090,8 @@ def key(self): return 0 @pytest.fixture - def warn(self): - return None + def warn(self, is_inplace): + return None if is_inplace else FutureWarning class TestSetitemMismatchedTZCastsToObject(SetitemCastingEquivalents): @@ -1161,7 +1161,10 @@ def test_setitem_example(self): obj = Series(idx) val = Interval(0.5, 1.5) - obj[0] = val + with tm.assert_produces_warning( + FutureWarning, match="Setting an item of incompatible dtype" + ): + obj[0] = val assert obj.dtype == "Interval[float64, right]" @pytest.fixture @@ -1185,7 +1188,7 @@ def expected(self, obj, val): @pytest.fixture def warn(self): - return None + return FutureWarning class TestSetitemRangeIntoIntegerSeries(SetitemCastingEquivalents): @@ -1435,8 +1438,12 @@ def test_slice_key(self, obj, key, expected, warn, val, indexer_sli, is_inplace) @pytest.mark.parametrize( - "val,exp_dtype", - [(Timestamp("2012-01-01"), "datetime64[ns]"), (1, object), ("x", object)], + "val,exp_dtype,warn", + [ + (Timestamp("2012-01-01"), "datetime64[ns]", None), + (1, object, FutureWarning), + ("x", object, FutureWarning), + ], ) class TestCoercionDatetime64(CoercionTest): # previously test_setitem_series_datetime64 in tests.indexing.test_coercion @@ -1451,13 +1458,13 @@ def warn(self): @pytest.mark.parametrize( - "val,exp_dtype", + "val,exp_dtype,warn", [ - (Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), + (Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]", None), # pre-2.0, a mis-matched tz would end up casting to object - (Timestamp("2012-01-01", tz="US/Pacific"), "datetime64[ns, US/Eastern]"), - (Timestamp("2012-01-01"), object), - (1, object), + (Timestamp("2012-01-01", tz="US/Pacific"), "datetime64[ns, US/Eastern]", None), + (Timestamp("2012-01-01"), object, FutureWarning), + (1, object, FutureWarning), ], ) class TestCoercionDatetime64TZ(CoercionTest): @@ -1473,8 +1480,12 @@ def warn(self): @pytest.mark.parametrize( - "val,exp_dtype", - [(Timedelta("12 day"), "timedelta64[ns]"), (1, object), ("x", object)], + "val,exp_dtype,warn", + [ + (Timedelta("12 day"), "timedelta64[ns]", None), + (1, object, FutureWarning), + ("x", object, FutureWarning), + ], ) class TestCoercionTimedelta64(CoercionTest): # previously test_setitem_series_timedelta64 in tests.indexing.test_coercion @@ -1504,7 +1515,7 @@ def obj(self, request): @pytest.fixture def warn(self): - return None + return FutureWarning def test_20643(): diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 408747ab19b24..9cd0ce250b5df 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -215,6 +215,7 @@ def test_convert_dtypes( "uint32", "int64", "uint64", + "interval[int64, right]", ]: with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): result[result.notna()] = np.nan diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index aaffd52b78f95..81d640dc7ab32 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -168,7 +168,8 @@ def test_fillna_consistency(self): # assignment ser2 = ser.copy() - ser2[1] = "foo" + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser2[1] = "foo" tm.assert_series_equal(ser2, expected) def test_fillna_downcast(self): diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 3f0078d3c1487..f1710c914c92a 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -49,7 +49,8 @@ def test_timedelta64_nan(self): assert not isna(td1[0]) # GH#16674 iNaT is treated as an integer when given by the user - td1[1] = iNaT + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + td1[1] = iNaT assert not isna(td1[1]) assert td1.dtype == np.object_ assert td1[1] == iNaT diff --git a/web/pandas/pdeps/0006-ban-upcasting.md b/web/pandas/pdeps/0006-ban-upcasting.md index 12e4c084cc826..a86455b70c71a 100644 --- a/web/pandas/pdeps/0006-ban-upcasting.md +++ b/web/pandas/pdeps/0006-ban-upcasting.md @@ -2,7 +2,7 @@ - Created: 23 December 2022 - Status: Accepted -- Discussion: [#50402](https://github.com/pandas-dev/pandas/pull/50402) +- Discussion: [#39584](https://github.com/pandas-dev/pandas/pull/50402) - Author: [Marco Gorelli](https://github.com/MarcoGorelli) ([original issue](https://github.com/pandas-dev/pandas/issues/39584) by [Joris Van den Bossche](https://github.com/jorisvandenbossche)) - Revision: 1