From 89c0f3d7b0d0d185365e080c84fd8bfd90f8904c Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Sat, 6 May 2023 23:06:10 +0000 Subject: [PATCH 1/7] make NA propagate where and mask operations --- pandas/core/generic.py | 3 +++ pandas/tests/frame/indexing/test_mask.py | 10 ++++++++++ pandas/tests/frame/indexing/test_where.py | 10 ++++++++++ pandas/tests/series/indexing/test_mask.py | 15 ++++++++++++++- pandas/tests/series/indexing/test_where.py | 11 +++++++++++ 5 files changed, 48 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 79e24ad2d0e4c..af4a845736023 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9870,6 +9870,9 @@ def _where( # align the cond to same shape as myself cond = common.apply_if_callable(cond, self) if isinstance(cond, NDFrame): + # cond may contain NA, see GH #52955 + # let NA propagate in where and mask operations + cond = cond.fillna(True) # CoW: Make sure reference is not kept alive if cond.ndim == 1 and self.ndim == 2: cond = cond._constructor_expanddim( diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index ed0bf256d1ee7..023f7d36eda25 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -8,6 +8,7 @@ NA, DataFrame, Float64Dtype, + Int64Dtype, Series, StringDtype, Timedelta, @@ -150,3 +151,12 @@ def test_mask_inplace_no_other(): df.mask(cond, inplace=True) expected = DataFrame({"a": [np.nan, 2], "b": ["x", np.nan]}) tm.assert_frame_equal(df, expected) + + +def test_mask_with_na(): + # See GH #52955, NA should propagate in mask + df = DataFrame([[1, NA], [NA, 2]], dtype=Int64Dtype()) + result = df.mask(df % 2 == 1, 0) + + expected = DataFrame([[0, NA], [NA, 2]], dtype=Int64Dtype()) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index c5e1e3c02c26e..b90d9701c4697 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -11,6 +11,7 @@ DataFrame, DatetimeIndex, Index, + Int64Dtype, Series, StringDtype, Timestamp, @@ -1032,3 +1033,12 @@ def test_where_inplace_no_other(): df.where(cond, inplace=True) expected = DataFrame({"a": [1, np.nan], "b": [np.nan, "y"]}) tm.assert_frame_equal(df, expected) + + +def test_where_with_na(): + # See GH #52955, NA should propagate in where + df = DataFrame([[1, pd.NA], [pd.NA, 2]], dtype=Int64Dtype()) + result = df.where(df % 2 == 1, 0) + + expected = DataFrame([[1, pd.NA], [pd.NA, 0]], dtype=Int64Dtype()) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py index 28235a8918e3f..d7f5141200895 100644 --- a/pandas/tests/series/indexing/test_mask.py +++ b/pandas/tests/series/indexing/test_mask.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import Series +from pandas import ( + NA, + Int64Dtype, + Series, +) import pandas._testing as tm @@ -67,3 +71,12 @@ def test_mask_inplace(): rs = s.copy() rs.mask(cond, -s, inplace=True) tm.assert_series_equal(rs, s.mask(cond, -s)) + + +def test_mask_with_na(): + # See GH #52955, NA should propagate in mask + s = Series([1, 2, NA], dtype=Int64Dtype()) + res = s.mask(s % 2 == 1, 0) + + exp = Series([0, 2, NA], dtype=Int64Dtype()) + tm.assert_series_equal(res, exp) diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index eabaf23bd36f8..38c4d1a44e549 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -5,6 +5,8 @@ import pandas as pd from pandas import ( + NA, + Int64Dtype, Series, Timestamp, date_range, @@ -464,3 +466,12 @@ def test_where_datetimelike_categorical(tz_naive_fixture): res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals)) tm.assert_frame_equal(res, pd.DataFrame(dr)) + + +def test_where_with_na(): + # See GH #52955, NA should propagate in where + s = Series([1, 2, NA], dtype=Int64Dtype()) + res = s.where(s % 2 == 1, 0) + + exp = Series([1, 0, NA], dtype=Int64Dtype()) + tm.assert_series_equal(res, exp) From 321147f47804ee5422c012ea134e7e1b62e0a686 Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Sat, 6 May 2023 23:14:30 +0000 Subject: [PATCH 2/7] changelog added --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 36b2aa3c28da5..eca71c24c5bc2 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -357,7 +357,7 @@ Interval Indexing ^^^^^^^^ -- +- :meth:`DataFrame.where`, :meth:`DataFrame.mask`, :meth:`Series.where`, and :meth:`Series.mask` now let ``pd.NA`` propagate through (:issue:`52955`) - Missing From 36bbe162866310a37e9c26354a2637b6cce0d0a0 Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Sun, 7 May 2023 13:49:26 +0000 Subject: [PATCH 3/7] fix when using boolean arrays --- pandas/core/generic.py | 3 +++ pandas/tests/series/indexing/test_mask.py | 6 ++++-- pandas/tests/series/indexing/test_where.py | 6 ++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index af4a845736023..dd7ca2e281935 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9887,6 +9887,9 @@ def _where( if cond.shape != self.shape: raise ValueError("Array conditional must be same shape as self") cond = self._constructor(cond, **self._construct_axes_dict(), copy=False) + # cond may contain NA, see GH #52955 + # let NA propagate in where and mask operations + cond = cond.fillna(True) # make sure we are boolean fill_value = bool(inplace) diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py index d7f5141200895..a5045d13b69ee 100644 --- a/pandas/tests/series/indexing/test_mask.py +++ b/pandas/tests/series/indexing/test_mask.py @@ -76,7 +76,9 @@ def test_mask_inplace(): def test_mask_with_na(): # See GH #52955, NA should propagate in mask s = Series([1, 2, NA], dtype=Int64Dtype()) - res = s.mask(s % 2 == 1, 0) + res1 = s.mask(s % 2 == 1, 0) + res2 = s.mask(s.array % 2 == 1, 0) exp = Series([0, 2, NA], dtype=Int64Dtype()) - tm.assert_series_equal(res, exp) + tm.assert_series_equal(res1, exp) + tm.assert_series_equal(res2, exp) diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index 38c4d1a44e549..cee940b5b6778 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -471,7 +471,9 @@ def test_where_datetimelike_categorical(tz_naive_fixture): def test_where_with_na(): # See GH #52955, NA should propagate in where s = Series([1, 2, NA], dtype=Int64Dtype()) - res = s.where(s % 2 == 1, 0) + res1 = s.where(s % 2 == 1, 0) + res2 = s.where(s.array % 2 == 1, 0) exp = Series([1, 0, NA], dtype=Int64Dtype()) - tm.assert_series_equal(res, exp) + tm.assert_series_equal(res1, exp) + tm.assert_series_equal(res2, exp) From e2216cb47cba4f914bccb6588de28adf8a763dc5 Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Mon, 8 May 2023 07:21:33 +0000 Subject: [PATCH 4/7] added tests, reword NA propagates -> if cond=NA then element propagates --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/generic.py | 6 ++---- pandas/tests/frame/indexing/test_mask.py | 12 ++++++++---- pandas/tests/frame/indexing/test_where.py | 12 ++++++++---- pandas/tests/series/indexing/test_mask.py | 2 +- pandas/tests/series/indexing/test_where.py | 2 +- 6 files changed, 21 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index eca71c24c5bc2..f4e2b914bc895 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -357,7 +357,7 @@ Interval Indexing ^^^^^^^^ -- :meth:`DataFrame.where`, :meth:`DataFrame.mask`, :meth:`Series.where`, and :meth:`Series.mask` now let ``pd.NA`` propagate through (:issue:`52955`) +- Bug in :meth:`DataFrame.where`, :meth:`DataFrame.mask`, :meth:`Series.where`, and :meth:`Series.mask`, when ``cond`` for an element is ``pd.NA``; the corresponding element now propagates through (:issue:`52955`) - Missing diff --git a/pandas/core/generic.py b/pandas/core/generic.py index dd7ca2e281935..3a4efbf29136c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9870,8 +9870,7 @@ def _where( # align the cond to same shape as myself cond = common.apply_if_callable(cond, self) if isinstance(cond, NDFrame): - # cond may contain NA, see GH #52955 - # let NA propagate in where and mask operations + # GH #52955: if cond is NA, element propagates in mask and where cond = cond.fillna(True) # CoW: Make sure reference is not kept alive if cond.ndim == 1 and self.ndim == 2: @@ -9887,8 +9886,7 @@ def _where( if cond.shape != self.shape: raise ValueError("Array conditional must be same shape as self") cond = self._constructor(cond, **self._construct_axes_dict(), copy=False) - # cond may contain NA, see GH #52955 - # let NA propagate in where and mask operations + # GH #52955: if cond is NA, element propagates in mask and where cond = cond.fillna(True) # make sure we are boolean diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index 023f7d36eda25..b97183005e68c 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -154,9 +154,13 @@ def test_mask_inplace_no_other(): def test_mask_with_na(): - # See GH #52955, NA should propagate in mask + # See GH #52955, if cond is NA, propagate in mask df = DataFrame([[1, NA], [NA, 2]], dtype=Int64Dtype()) - result = df.mask(df % 2 == 1, 0) - expected = DataFrame([[0, NA], [NA, 2]], dtype=Int64Dtype()) - tm.assert_frame_equal(result, expected) + result1 = df.mask(df % 2 == 1, 0) + expected1 = DataFrame([[0, NA], [NA, 2]], dtype=Int64Dtype()) + tm.assert_frame_equal(result1, expected1) + + result2 = df.mask(df[0] % 2 == 1, 0) + expected2 = DataFrame([[0, 0], [NA, 2]], dtype=Int64Dtype()) + tm.assert_frame_equal(result2, expected2) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index b90d9701c4697..df6d90038585f 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -1036,9 +1036,13 @@ def test_where_inplace_no_other(): def test_where_with_na(): - # See GH #52955, NA should propagate in where + # See GH #52955, if cond is NA, propagate in where df = DataFrame([[1, pd.NA], [pd.NA, 2]], dtype=Int64Dtype()) - result = df.where(df % 2 == 1, 0) - expected = DataFrame([[1, pd.NA], [pd.NA, 0]], dtype=Int64Dtype()) - tm.assert_frame_equal(result, expected) + result1 = df.where(df % 2 == 1, 0) + expected1 = DataFrame([[1, pd.NA], [pd.NA, 0]], dtype=Int64Dtype()) + tm.assert_frame_equal(result1, expected1) + + result2 = df.where(df[0] % 2 == 1, 0) + expected2 = DataFrame([[1, pd.NA], [pd.NA, 2]], dtype=Int64Dtype()) + tm.assert_frame_equal(result2, expected2) diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py index a5045d13b69ee..fc46935f05100 100644 --- a/pandas/tests/series/indexing/test_mask.py +++ b/pandas/tests/series/indexing/test_mask.py @@ -74,7 +74,7 @@ def test_mask_inplace(): def test_mask_with_na(): - # See GH #52955, NA should propagate in mask + # See GH #52955, if cond is NA, propagate in mask s = Series([1, 2, NA], dtype=Int64Dtype()) res1 = s.mask(s % 2 == 1, 0) res2 = s.mask(s.array % 2 == 1, 0) diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index cee940b5b6778..2b5e6d0332764 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -469,7 +469,7 @@ def test_where_datetimelike_categorical(tz_naive_fixture): def test_where_with_na(): - # See GH #52955, NA should propagate in where + # See GH #52955, if cond is NA, propagate in where s = Series([1, 2, NA], dtype=Int64Dtype()) res1 = s.where(s % 2 == 1, 0) res2 = s.where(s.array % 2 == 1, 0) From 9875669f9aba7caccd527b0646f46dade3c31a73 Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Mon, 8 May 2023 17:05:41 +0000 Subject: [PATCH 5/7] avoid multiple fillna when unnecessary --- pandas/core/generic.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 01d9d709b9a8c..925fd06dbf8a8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9867,6 +9867,7 @@ def _where( axis = self._get_axis_number(axis) # align the cond to same shape as myself + # meanwhile make sure cond is boolean cond = common.apply_if_callable(cond, self) if isinstance(cond, NDFrame): # GH #52955: if cond is NA, element propagates in mask and where @@ -9878,7 +9879,8 @@ def _where( copy=False, ) cond.columns = self.columns - cond = cond.align(self, join="right", copy=False)[0] + # align can introduce na, make sure we are boolean + cond = cond.align(self, join="right", copy=False)[0].fillna(bool(inplace)) else: if not hasattr(cond, "shape"): cond = np.asanyarray(cond) @@ -9888,10 +9890,6 @@ def _where( # GH #52955: if cond is NA, element propagates in mask and where cond = cond.fillna(True) - # make sure we are boolean - fill_value = bool(inplace) - cond = cond.fillna(fill_value) - msg = "Boolean array expected for the condition, not {dtype}" if not cond.empty: @@ -9906,7 +9904,7 @@ def _where( if cond._mgr.any_extension_types: # GH51574: avoid object ndarray conversion later on cond = cond._constructor( - cond.to_numpy(dtype=bool, na_value=fill_value), + cond.to_numpy(dtype=bool), **cond._construct_axes_dict(), ) else: From 09f62bc2da776b5fd3343a73b8293a9587da87ef Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Wed, 30 Aug 2023 00:46:38 +0800 Subject: [PATCH 6/7] raise in where and mask if cond is nullable bool with NAs --- pandas/core/generic.py | 29 +++++++++++++--------- pandas/tests/frame/indexing/test_mask.py | 14 +++++------ pandas/tests/frame/indexing/test_where.py | 23 ++++------------- pandas/tests/series/indexing/test_mask.py | 15 ++++++----- pandas/tests/series/indexing/test_where.py | 15 ++++++----- 5 files changed, 42 insertions(+), 54 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 007c9769e3137..27ca2d9c0dec2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10292,40 +10292,45 @@ def _where( axis = self._get_axis_number(axis) # align the cond to same shape as myself - # meanwhile make sure cond is boolean + cond_hasna: bool_t cond = common.apply_if_callable(cond, self) if isinstance(cond, NDFrame): - # GH #52955: if cond is NA, element propagates in mask and where - cond = cond.fillna(True) # CoW: Make sure reference is not kept alive - if cond.ndim == 1 and self.ndim == 2: - cond = cond._constructor_expanddim( - {i: cond for i in range(len(self.columns))}, - copy=False, + cond_hasna = cond.isna().any(axis=None) + if not cond_hasna: + if cond.ndim == 1 and self.ndim == 2: + cond = cond._constructor_expanddim( + {i: cond for i in range(len(self.columns))}, + copy=False, + ) + cond.columns = self.columns + cond = cond.align(self, join="right", copy=False)[0].fillna( + bool(inplace) ) - cond.columns = self.columns - # align can introduce na, make sure we are boolean - cond = cond.align(self, join="right", copy=False)[0].fillna(bool(inplace)) else: if not hasattr(cond, "shape"): cond = np.asanyarray(cond) if cond.shape != self.shape: raise ValueError("Array conditional must be same shape as self") cond = self._constructor(cond, **self._construct_axes_dict(), copy=False) - # GH #52955: if cond is NA, element propagates in mask and where - cond = cond.fillna(True) + cond_hasna = cond.isna().any(axis=None) msg = "Boolean array expected for the condition, not {dtype}" + namsg = "The condition array cannot contain NA values" if not cond.empty: if not isinstance(cond, ABCDataFrame): # This is a single-dimensional object. if not is_bool_dtype(cond): raise ValueError(msg.format(dtype=cond.dtype)) + if cond_hasna: + raise ValueError(namsg) else: for _dt in cond.dtypes: if not is_bool_dtype(_dt): raise ValueError(msg.format(dtype=_dt)) + if cond_hasna: + raise ValueError(namsg) if cond._mgr.any_extension_types: # GH51574: avoid object ndarray conversion later on cond = cond._constructor( diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index b9a9bf42911cd..186d39993a8e9 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -3,6 +3,7 @@ """ import numpy as np +import pytest from pandas import ( NA, @@ -154,13 +155,10 @@ def test_mask_inplace_no_other(): def test_mask_with_na(): - # See GH #52955, if cond is NA, propagate in mask + # GH#52955 df = DataFrame([[1, NA], [NA, 2]], dtype=Int64Dtype()) + msg = "The condition array cannot contain NA values" - result1 = df.mask(df % 2 == 1, 0) - expected1 = DataFrame([[0, NA], [NA, 2]], dtype=Int64Dtype()) - tm.assert_frame_equal(result1, expected1) - - result2 = df.mask(df[0] % 2 == 1, 0) - expected2 = DataFrame([[0, 0], [NA, 2]], dtype=Int64Dtype()) - tm.assert_frame_equal(result2, expected2) + for cond_frame in [df, df[0]]: + with pytest.raises(ValueError, match=msg): + df.mask(cond_frame % 2 == 1, 0) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index aa8b537bd3868..225b4531767e5 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -1047,16 +1047,6 @@ def test_where_dt64_2d(): _check_where_equivalences(df, mask, other, expected) -def test_where_producing_ea_cond_for_np_dtype(): - # GH#44014 - df = DataFrame({"a": Series([1, pd.NA, 2], dtype="Int64"), "b": [1, 2, 3]}) - result = df.where(lambda x: x.apply(lambda y: y > 1, axis=1)) - expected = DataFrame( - {"a": Series([pd.NA, pd.NA, 2], dtype="Int64"), "b": [np.nan, 2, 3]} - ) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( "replacement", [0.001, True, "snake", None, datetime(2022, 5, 4)] ) @@ -1079,13 +1069,10 @@ def test_where_inplace_no_other(): def test_where_with_na(): - # See GH #52955, if cond is NA, propagate in where + # GH#52955 df = DataFrame([[1, pd.NA], [pd.NA, 2]], dtype=Int64Dtype()) + msg = "The condition array cannot contain NA values" - result1 = df.where(df % 2 == 1, 0) - expected1 = DataFrame([[1, pd.NA], [pd.NA, 0]], dtype=Int64Dtype()) - tm.assert_frame_equal(result1, expected1) - - result2 = df.where(df[0] % 2 == 1, 0) - expected2 = DataFrame([[1, pd.NA], [pd.NA, 2]], dtype=Int64Dtype()) - tm.assert_frame_equal(result2, expected2) + for cond_frame in [df, df[0]]: + with pytest.raises(ValueError, match=msg): + df.where(cond_frame % 2 == 1, 0) diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py index d9ab0da8f8fef..d78ee69a51d9c 100644 --- a/pandas/tests/series/indexing/test_mask.py +++ b/pandas/tests/series/indexing/test_mask.py @@ -74,11 +74,10 @@ def test_mask_inplace(): def test_mask_with_na(): - # See GH #52955, if cond is NA, propagate in mask - s = Series([1, 2, NA], dtype=Int64Dtype()) - res1 = s.mask(s % 2 == 1, 0) - res2 = s.mask(s.array % 2 == 1, 0) - - exp = Series([0, 2, NA], dtype=Int64Dtype()) - tm.assert_series_equal(res1, exp) - tm.assert_series_equal(res2, exp) + # GH#52955 + ser = Series([1, 2, NA], dtype=Int64Dtype()) + msg = "The condition array cannot contain NA values" + + for cond_arr in [ser, ser.array]: + with pytest.raises(ValueError, match=msg): + ser.mask(cond_arr % 2 == 1, 0) diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index 50b521c6e25a0..89c552bd7a221 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -476,11 +476,10 @@ def test_where_datetimelike_categorical(tz_naive_fixture): def test_where_with_na(): - # See GH #52955, if cond is NA, propagate in where - s = Series([1, 2, NA], dtype=Int64Dtype()) - res1 = s.where(s % 2 == 1, 0) - res2 = s.where(s.array % 2 == 1, 0) - - exp = Series([1, 0, NA], dtype=Int64Dtype()) - tm.assert_series_equal(res1, exp) - tm.assert_series_equal(res2, exp) + # GH#52955 + ser = Series([1, 2, NA], dtype=Int64Dtype()) + msg = "The condition array cannot contain NA values" + + for cond_arr in [ser, ser.array]: + with pytest.raises(ValueError, match=msg): + ser.where(cond_arr % 2 == 1, 0) From cbbd866c78ac8b3918139f033376f7bb578cc88a Mon Sep 17 00:00:00 2001 From: Charlie-XIAO Date: Wed, 30 Aug 2023 11:17:06 +0800 Subject: [PATCH 7/7] remove conflicting (?) test and improve message --- pandas/core/generic.py | 6 +++--- pandas/tests/frame/indexing/test_mask.py | 2 +- pandas/tests/frame/indexing/test_setitem.py | 13 ------------- pandas/tests/frame/indexing/test_where.py | 2 +- pandas/tests/series/indexing/test_mask.py | 2 +- pandas/tests/series/indexing/test_where.py | 2 +- 6 files changed, 7 insertions(+), 20 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3baf65e6c8818..fc0f6a8dabb7f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10316,7 +10316,7 @@ def _where( cond_hasna = cond.isna().any(axis=None) msg = "Boolean array expected for the condition, not {dtype}" - namsg = "The condition array cannot contain NA values" + na_msg = "Cannot mask with an array containing NA / NaN values" if not cond.empty: if not isinstance(cond, ABCDataFrame): @@ -10324,13 +10324,13 @@ def _where( if not is_bool_dtype(cond): raise ValueError(msg.format(dtype=cond.dtype)) if cond_hasna: - raise ValueError(namsg) + raise ValueError(na_msg) else: for _dt in cond.dtypes: if not is_bool_dtype(_dt): raise ValueError(msg.format(dtype=_dt)) if cond_hasna: - raise ValueError(namsg) + raise ValueError(na_msg) if cond._mgr.any_extension_types: # GH51574: avoid object ndarray conversion later on cond = cond._constructor( diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index 186d39993a8e9..5387c3ed2bd3d 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -157,7 +157,7 @@ def test_mask_inplace_no_other(): def test_mask_with_na(): # GH#52955 df = DataFrame([[1, NA], [NA, 2]], dtype=Int64Dtype()) - msg = "The condition array cannot contain NA values" + msg = "Cannot mask with an array containing NA / NaN values" for cond_frame in [df, df[0]]: with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index ccc1249088f9a..afb5cdcae9fa0 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1144,19 +1144,6 @@ def test_loc_setitem_all_false_boolean_two_blocks(self): df.loc[indexer, ["b"]] = DataFrame({"b": [5, 6]}, index=[0, 1]) tm.assert_frame_equal(df, expected) - def test_setitem_ea_boolean_mask(self): - # GH#47125 - df = DataFrame([[-1, 2], [3, -4]]) - expected = DataFrame([[0, 2], [3, 0]]) - boolean_indexer = DataFrame( - { - 0: Series([True, False], dtype="boolean"), - 1: Series([pd.NA, True], dtype="boolean"), - } - ) - df[boolean_indexer] = 0 - tm.assert_frame_equal(df, expected) - class TestDataFrameSetitemCopyViewSemantics: def test_setitem_always_copy(self, float_frame): diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 225b4531767e5..07f9d9cd3d492 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -1071,7 +1071,7 @@ def test_where_inplace_no_other(): def test_where_with_na(): # GH#52955 df = DataFrame([[1, pd.NA], [pd.NA, 2]], dtype=Int64Dtype()) - msg = "The condition array cannot contain NA values" + msg = "Cannot mask with an array containing NA / NaN values" for cond_frame in [df, df[0]]: with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py index d78ee69a51d9c..a3648592e734b 100644 --- a/pandas/tests/series/indexing/test_mask.py +++ b/pandas/tests/series/indexing/test_mask.py @@ -76,7 +76,7 @@ def test_mask_inplace(): def test_mask_with_na(): # GH#52955 ser = Series([1, 2, NA], dtype=Int64Dtype()) - msg = "The condition array cannot contain NA values" + msg = "Cannot mask with an array containing NA / NaN values" for cond_arr in [ser, ser.array]: with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index 89c552bd7a221..241f9e7230080 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -478,7 +478,7 @@ def test_where_datetimelike_categorical(tz_naive_fixture): def test_where_with_na(): # GH#52955 ser = Series([1, 2, NA], dtype=Int64Dtype()) - msg = "The condition array cannot contain NA values" + msg = "Cannot mask with an array containing NA / NaN values" for cond_arr in [ser, ser.array]: with pytest.raises(ValueError, match=msg):