From 4b76adb5d649b3f679bf9d45fea842aab16478df Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 18 May 2023 08:43:07 -0700 Subject: [PATCH 1/2] DEPR: allow DataFrame[mask]=value with mixed non-numeric dtypes --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/generic.py | 13 +++++++++++++ pandas/tests/frame/indexing/test_where.py | 20 +++++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index e1ac9e3309de7..61fcf99253386 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -264,6 +264,7 @@ Deprecations - Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`) - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) +- Deprecated behavior of :meth:`DataFrame.__setitem__` with a boolean mask and :meth:`DataFrame.putmask` with mixed non-numeric dtypes and a value other than ``NaN``, in a future version this will be allowed instead of raising ``TypeError`` (:issue:`??`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 52bbafffe5340..3e1a0df0779c9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6209,6 +6209,19 @@ def _check_inplace_setting(self, value) -> bool_t: if (is_float(value) and np.isnan(value)) or value is lib.no_default: return True + # Deprecate raising introduced in GH#7657, as the original + # inconsistency no longer appears to exist. + # TODO(3.0): once this is enforced, is_numeric_mixed_type can + # be removed. + warnings.warn( + "DataFrame.__setitem__ with a boolean mask and " + "DataFrame.putmask with mixed non-numeric " + "dtypes and a value other than NaN behavior is deprecated. In a " + "future version this will be allowed.", + FutureWarning, + stacklevel=find_stack_level(), + ) + raise TypeError( "Cannot do inplace boolean setting on " "mixed-types with a non np.nan value" diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index c5e1e3c02c26e..6e0c5268681bb 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -401,10 +401,28 @@ def test_where_none(self): {"A": np.nan, "B": "Test", "C": np.nan}, ] ) + warn_msg = ( + "DataFrame.__setitem__ with a boolean mask and " + "DataFrame.putmask with mixed non-numeric" + ) msg = "boolean setting on mixed-type" + mask = ~isna(df) + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + df.where(mask, None, inplace=True) + + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + df.where(mask, 3, inplace=True) + + with pytest.raises(TypeError, match=msg): + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + df[mask] = None + with pytest.raises(TypeError, match=msg): - df.where(~isna(df), None, inplace=True) + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + df[mask] = 3 def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self): # see gh-21947 From ac0337c1b2b144d7367e67c7ce9850fb7a67188f Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 19 May 2023 08:28:14 -0700 Subject: [PATCH 2/2] Change deprecation to bugfix --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/frame.py | 1 - pandas/core/generic.py | 30 --------------------- pandas/core/internals/array_manager.py | 4 --- pandas/core/internals/managers.py | 4 --- pandas/tests/frame/indexing/test_where.py | 33 ++++++++++------------- 6 files changed, 15 insertions(+), 59 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 652be16902190..c511626f060cb 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -264,7 +264,6 @@ Deprecations - Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`) - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) -- Deprecated behavior of :meth:`DataFrame.__setitem__` with a boolean mask and :meth:`DataFrame.putmask` with mixed non-numeric dtypes and a value other than ``NaN``, in a future version this will be allowed instead of raising ``TypeError`` (:issue:`??`) - .. --------------------------------------------------------------------------- @@ -371,6 +370,7 @@ Interval Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__setitem__` losing dtype when setting a :class:`DataFrame` into duplicated columns (:issue:`53143`) +- Bug in :meth:`DataFrame.__setitem__` with a boolean mask and :meth:`DataFrame.putmask` with mixed non-numeric dtypes and a value other than ``NaN`` incorrectly raising ``TypeError`` (:issue:`53291`) - Missing diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8f6698cab000c..e3c5f2facbe94 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4074,7 +4074,6 @@ def _setitem_frame(self, key, value): "Must pass DataFrame or 2-d ndarray with boolean values only" ) - self._check_inplace_setting(value) self._check_setitem_copy() self._where(-key, value, inplace=True) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 83827e6638ff4..e651ab45225c2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -114,7 +114,6 @@ is_bool_dtype, is_dict_like, is_extension_array_dtype, - is_float, is_list_like, is_number, is_numeric_dtype, @@ -6201,34 +6200,6 @@ def _is_mixed_type(self) -> bool_t: return self.dtypes.nunique() > 1 - @final - def _check_inplace_setting(self, value) -> bool_t: - """check whether we allow in-place setting with this type of value""" - if self._is_mixed_type and not self._mgr.is_numeric_mixed_type: - # allow an actual np.nan through - if (is_float(value) and np.isnan(value)) or value is lib.no_default: - return True - - # Deprecate raising introduced in GH#7657, as the original - # inconsistency no longer appears to exist. - # TODO(3.0): once this is enforced, is_numeric_mixed_type can - # be removed. - warnings.warn( - "DataFrame.__setitem__ with a boolean mask and " - "DataFrame.putmask with mixed non-numeric " - "dtypes and a value other than NaN behavior is deprecated. In a " - "future version this will be allowed.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - raise TypeError( - "Cannot do inplace boolean setting on " - "mixed-types with a non np.nan value" - ) - - return True - @final def _get_numeric_data(self) -> Self: return self._constructor(self._mgr.get_numeric_data()).__finalize__(self) @@ -10049,7 +10020,6 @@ def _where( # we may have different type blocks come out of putmask, so # reconstruct the block manager - self._check_inplace_setting(other) new_data = self._mgr.putmask(mask=cond, new=other, align=align) result = self._constructor(new_data) return self._update_inplace(result) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index f04ac76998adc..75eaaa80a9961 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -443,10 +443,6 @@ def to_native_types(self, **kwargs) -> Self: def is_mixed_type(self) -> bool: return True - @property - def is_numeric_mixed_type(self) -> bool: - return all(is_numeric_dtype(t) for t in self.get_dtypes()) - @property def any_extension_types(self) -> bool: """Whether any of the blocks in this manager are extension blocks""" diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8e60bf02fed75..2a7c0536c66a4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -514,10 +514,6 @@ def to_native_types(self, **kwargs) -> Self: """ return self.apply("to_native_types", **kwargs) - @property - def is_numeric_mixed_type(self) -> bool: - return all(block.is_numeric for block in self.blocks) - @property def any_extension_types(self) -> bool: """Whether any of the blocks in this manager are extension blocks""" diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 6e0c5268681bb..562f2fbe55c25 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -401,28 +401,23 @@ def test_where_none(self): {"A": np.nan, "B": "Test", "C": np.nan}, ] ) - warn_msg = ( - "DataFrame.__setitem__ with a boolean mask and " - "DataFrame.putmask with mixed non-numeric" - ) - msg = "boolean setting on mixed-type" - mask = ~isna(df) - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - df.where(mask, None, inplace=True) + orig = df.copy() - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - df.where(mask, 3, inplace=True) - - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - df[mask] = None + mask = ~isna(df) + df.where(mask, None, inplace=True) + expected = DataFrame( + { + "A": [1.0, np.nan], + "B": [None, "Test"], + "C": ["Test", None], + } + ) + tm.assert_frame_equal(df, expected) - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - df[mask] = 3 + df = orig.copy() + df[~mask] = None + tm.assert_frame_equal(df, expected) def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self): # see gh-21947