diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index ef59c86a21598..4e086d0901c3b 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -253,6 +253,7 @@ Other Deprecations - Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`) - Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`) - Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`) +- Deprecated setting item of incompatible dtype in inplace arithmetic operations operating on a whole column (e.g. ``df.loc[:, 'a'] += .1``) (:issue:`39584`) - Deprecated string ``AS`` denoting frequency in :class:`YearBegin` and strings ``AS-DEC``, ``AS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`54275`) - Deprecated string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`54275`) - Deprecated string ``BAS`` denoting frequency in :class:`BYearBegin` and strings ``BAS-DEC``, ``BAS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`54275`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 09c43822e11e4..41fa10f673366 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7389,7 +7389,7 @@ def value_counts( if sort: counts = counts.sort_values(ascending=ascending) if normalize: - counts /= counts.sum() + counts = counts / counts.sum() # Force MultiIndex for a list_like subset with a single column if is_list_like(subset) and len(subset) == 1: # type: ignore[arg-type] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f1ecc57335a51..f7280253b1abf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -12379,6 +12379,15 @@ def _inplace_method(self, other, op) -> Self: Wrap arithmetic method to operate inplace. """ result = op(self, other) + if self._typ == "series" and result.dtype != self.dtype: + warnings.warn( + f"Setting an item of incompatible dtype is deprecated " + "and will raise in a future error of pandas. " + f"Value '{other}' has dtype incompatible with {self.values.dtype}, " + "please explicitly cast to a compatible dtype first.", + FutureWarning, + stacklevel=find_stack_level(), + ) if self.ndim == 1 and result._indexed_same(self) and result.dtype == self.dtype: # GH#36498 this inplace op can _actually_ be inplace. diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e33c4b3579c69..19cb869d73864 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2831,7 +2831,7 @@ def _value_counts( # GH#43999 - deprecation of observed=False observed=False, ).transform("sum") - result_series /= indexed_group_size + result_series = result_series / indexed_group_size # Handle groups of non-observed categories result_series = result_series.fillna(0.0) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index ccc1249088f9a..997e2e986425a 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1341,3 +1341,12 @@ def test_frame_setitem_empty_dataframe(self): index=Index([], dtype="datetime64[ns]", name="date"), ) tm.assert_frame_equal(df, expected) + + +def test_iadd_incompatible_dtype() -> None: + # https://github.com/pandas-dev/pandas/issues/39584 + df = DataFrame({"a": [1, 1, 2], "b": [4, 5, 6]}) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df.loc[:, "a"] += 0.1 + expected = DataFrame({"a": [1.1, 1.1, 2.1], "b": [4, 5, 6]}) + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index f72c0594fa1f7..74c1892bb1bec 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -510,7 +510,7 @@ def test_drop_with_duplicate_columns2(self): def test_drop_inplace_no_leftover_column_reference(self): # GH 13934 - df = DataFrame({"a": [1, 2, 3]}) + df = DataFrame({"a": [1.0, 2.0, 3.0]}) a = df.a df.drop(["a"], axis=1, inplace=True) tm.assert_index_equal(df.columns, Index([], dtype="object")) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 1d1a4dbe83a9c..5ea33acd57ca1 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1742,7 +1742,8 @@ def test_inplace_ops_identity(self): # dtype change s = s_orig.copy() s2 = s - s += 1.5 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + s += 1.5 tm.assert_series_equal(s, s2) tm.assert_series_equal(s_orig + 1.5, s) @@ -1767,7 +1768,8 @@ def test_inplace_ops_identity(self): df = df_orig.copy() df2 = df - df["A"] += 1.5 + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + df["A"] += 1.5 expected = DataFrame({"A": arr.copy() + 1.5, "B": "foo"}) tm.assert_frame_equal(df, expected) tm.assert_frame_equal(df2, expected) diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index c1ee107715b71..3969ab4c02967 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -397,7 +397,7 @@ def test_compound( expected[column] = [education_df[column][row] for row in expected_rows] if normalize: expected["proportion"] = expected_count - expected["proportion"] /= expected_group_size + expected["proportion"] = expected["proportion"] / expected_group_size else: expected["count"] = expected_count tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 5fcd3a19dcaa4..ce3589c4e8055 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1809,3 +1809,12 @@ def test_setitem_empty_mask_dont_upcast_dt64(): ser.mask(mask, "foo", inplace=True) assert ser.dtype == dti.dtype # no-op -> dont upcast tm.assert_series_equal(ser, orig) + + +def test_iadd_incompatible_dtype() -> None: + # https://github.com/pandas-dev/pandas/issues/39584 + ser = Series([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): + ser.loc[:] += 2.2 + expected = Series([3.2, 4.2, 5.2]) + tm.assert_series_equal(ser, expected)