Skip to content

Warn on boolean frame indexer #39373

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
16 changes: 15 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3041,6 +3041,12 @@ def __getitem__(self, key):

# Do we have a (boolean) DataFrame?
if isinstance(key, DataFrame):
warnings.warn(
"Using a DataFrame as an indexer is deprecated "
"and will be disallowed in future. Use where instead.",
FutureWarning,
stacklevel=2,
)
return self.where(key)

# Do we have a (boolean) 1d indexer?
Expand Down Expand Up @@ -3187,7 +3193,15 @@ def __setitem__(self, key, value):
# to a slice for partial-string date indexing
return self._setitem_slice(indexer, value)

if isinstance(key, DataFrame) or getattr(key, "ndim", None) == 2:
if isinstance(key, DataFrame):
warnings.warn(
"Using a DataFrame as an indexer is deprecated "
"and will be disallowed in future. Use mask instead.",
FutureWarning,
stacklevel=2,
)
self._setitem_frame(key, value)
elif getattr(key, "ndim", None) == 2:
self._setitem_frame(key, value)
elif isinstance(key, (Series, np.ndarray, list, Index)):
self._setitem_array(key, value)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def _check_op(self, obj, op, other, op_name, exc=NotImplementedError):
expected = expected.astype("Float64")
if op_name == "__rpow__":
# for rpow, combine does not propagate NaN
expected[result.isna()] = np.nan
expected.mask(result.isna(), np.nan, True)
self.assert_equal(result, expected)
else:
with pytest.raises(exc):
Expand Down
61 changes: 26 additions & 35 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@ def inc(x):
return x + 1

df = DataFrame([[-1, 1], [1, -1]])
df[df > 0] = inc
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
df[df > 0] = inc

expected = DataFrame([[-1, inc], [inc, -1]])
tm.assert_frame_equal(df, expected)
Expand All @@ -259,7 +260,7 @@ def test_setitem_same_column(self, cols, values, expected):
result = df["a"].values[0]
assert result == expected

def test_getitem_boolean(
def test_boolean_indexing(
self, float_string_frame, mixed_float_frame, mixed_int_frame, datetime_frame
):
# boolean indexing
Expand All @@ -278,7 +279,7 @@ def test_getitem_boolean(
tm.assert_frame_equal(subframe_obj, subframe)

with pytest.raises(ValueError, match="Boolean array expected"):
datetime_frame[datetime_frame]
datetime_frame.where(datetime_frame)

# test that Series work
indexer_obj = Series(indexer_obj, datetime_frame.index)
Expand Down Expand Up @@ -306,7 +307,7 @@ def test_getitem_boolean(
continue

data = df._get_numeric_data()
bif = df[df > 0]
bif = df.where(df > 0)
bifw = DataFrame(
{c: np.where(data[c] > 0, data[c], np.nan) for c in data.columns},
index=data.index,
Expand All @@ -324,7 +325,7 @@ def test_getitem_boolean(
if bif[c].dtype != bifw[c].dtype:
assert bif[c].dtype == df[c].dtype

def test_getitem_boolean_casting(self, datetime_frame):
def test_boolean_casting(self, datetime_frame):

# don't upcast if we don't need to
df = datetime_frame.copy()
Expand All @@ -335,7 +336,7 @@ def test_getitem_boolean_casting(self, datetime_frame):
df["F"] = df["F"].astype("int64")
df["F1"] = df["F"].copy()

casted = df[df > 0]
casted = df.where(df > 0)
result = casted.dtypes
expected = Series(
[np.dtype("float64")] * 4
Expand All @@ -347,7 +348,7 @@ def test_getitem_boolean_casting(self, datetime_frame):

# int block splitting
df.loc[df.index[1:3], ["E1", "F1"]] = 0
casted = df[df > 0]
casted = df.where(df > 0)
result = casted.dtypes
expected = Series(
[np.dtype("float64")] * 4
Expand All @@ -371,16 +372,6 @@ def _checkit(lst):
_checkit([True, True, True])
_checkit([False, False, False])

def test_getitem_boolean_iadd(self):
arr = np.random.randn(5, 5)

df = DataFrame(arr.copy(), columns=["A", "B", "C", "D", "E"])

df[df < 0] += 1
arr[arr < 0] += 1

tm.assert_almost_equal(df.values, arr)

def test_boolean_index_empty_corner(self):
# #2096
blah = DataFrame(np.empty([0, 1]), columns=["A"], index=DatetimeIndex([]))
Expand Down Expand Up @@ -500,7 +491,7 @@ def test_setitem_always_copy(self, float_frame):
float_frame["E"][5:10] = np.nan
assert notna(s[5:10]).all()

def test_setitem_boolean(self, float_frame):
def test_boolean_indexer_assignments(self, float_frame):
df = float_frame.copy()
values = float_frame.values

Expand All @@ -515,38 +506,38 @@ def test_setitem_boolean(self, float_frame):
values[values[:, 0] == 4] = 1
tm.assert_almost_equal(df.values, values)

df[df > 0] = 5
df.mask(df > 0, 5, True)
values[values > 0] = 5
tm.assert_almost_equal(df.values, values)

df[df == 5] = 0
df.mask(df == 5, 0, True)
values[values == 5] = 0
tm.assert_almost_equal(df.values, values)

# a df that needs alignment first
df[df[:-1] < 0] = 2
df.mask(df[:-1] < 0, 2, True)
np.putmask(values[:-1], values[:-1] < 0, 2)
tm.assert_almost_equal(df.values, values)

# indexed with same shape but rows-reversed df
df[df[::-1] == 2] = 3
df.mask(df[::-1] == 2, 3, True)
values[values == 2] = 3
tm.assert_almost_equal(df.values, values)

msg = "Must pass DataFrame or 2-d ndarray with boolean values only"
msg = "inputs could not be safely coerced"
with pytest.raises(TypeError, match=msg):
df[df * 0] = 2
df.mask(df * 0, 2, True)

# index with DataFrame
mask = df > np.abs(df)
expected = df.copy()
df[df > np.abs(df)] = np.nan
df.mask(df > np.abs(df), np.nan, True)
expected.values[mask.values] = np.nan
tm.assert_frame_equal(df, expected)

# set from DataFrame
expected = df.copy()
df[df > np.abs(df)] = df * 2
df.mask(df > np.abs(df), df * 2, True)
np.putmask(expected.values, mask.values, df.values * 2)
tm.assert_frame_equal(df, expected)

Expand Down Expand Up @@ -733,13 +724,13 @@ def test_setitem_empty(self):

@pytest.mark.parametrize("dtype", ["float", "int64"])
@pytest.mark.parametrize("kwargs", [{}, {"index": [1]}, {"columns": ["A"]}])
def test_setitem_empty_frame_with_boolean(self, dtype, kwargs):
def test_mask_empty_frame_with_boolean(self, dtype, kwargs):
# see gh-10126
kwargs["dtype"] = dtype
df = DataFrame(**kwargs)

df2 = df.copy()
df[df > df2] = 47
df.mask(df > df2, 47, True)
tm.assert_frame_equal(df, df2)

def test_setitem_with_empty_listlike(self):
Expand All @@ -757,11 +748,11 @@ def test_setitem_scalars_no_index(self):
expected = DataFrame(columns=["foo"]).astype(np.int64)
tm.assert_frame_equal(df, expected)

def test_getitem_empty_frame_with_boolean(self):
def test_where_empty_frame_with_boolean(self):
# Test for issue #11859

df = DataFrame()
df2 = df[df > 0]
df2 = df.where(df > 0)
tm.assert_frame_equal(df, df2)

def test_getitem_fancy_slice_integers_step(self):
Expand Down Expand Up @@ -1554,7 +1545,7 @@ def test_loc_getitem_index_single_double_tuples(self, tpl):
expected = DataFrame(index=idx)
tm.assert_frame_equal(result, expected)

def test_setitem_boolean_indexing(self):
def test_mask_boolean_indexing(self):
idx = list(range(3))
cols = ["A", "B", "C"]
df1 = DataFrame(
Expand All @@ -1572,12 +1563,12 @@ def test_setitem_boolean_indexing(self):
data=np.array([[0.0, 0.5, 1.0], [1.5, 2.0, -1], [-1, -1, -1]], dtype=float),
)

df1[df1 > 2.0 * df2] = -1
df1.mask(df1 > 2.0 * df2, -1, True)
tm.assert_frame_equal(df1, expected)
with pytest.raises(ValueError, match="Item wrong length"):
df1[df1.index[:-1] > 2] = -1

def test_getitem_boolean_indexing_mixed(self):
def test_mask_boolean_indexing_mixed(self):
df = DataFrame(
{
0: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan},
Expand Down Expand Up @@ -1609,7 +1600,7 @@ def test_getitem_boolean_indexing_mixed(self):

# mixed int/float ok
df2 = df.copy()
df2[df2 > 0.3] = 1
df2.mask(df2 > 0.3, 1, True)
expected = df.copy()
expected.loc[40, 1] = 1
expected.loc[49, 1] = 1
Expand All @@ -1621,7 +1612,7 @@ def test_getitem_boolean_indexing_mixed(self):
msg = "not supported between instances|unorderable types"

with pytest.raises(TypeError, match=msg):
df[df > 0.3] = 1
df.mask(df > 0.3, 1, True)

def test_type_error_multiindex(self):
# See gh-12218
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,21 +451,21 @@ def test_setitem_callable(self):
tm.assert_frame_equal(df, exp)


class TestDataFrameSetItemBooleanMask:
class TestDataFrameMaskBooleanMask:
@pytest.mark.parametrize(
"mask_type",
[lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values],
ids=["dataframe", "array"],
)
def test_setitem_boolean_mask(self, mask_type, float_frame):
def test_mask_boolean_mask(self, mask_type, float_frame):

# Test for issue #18582
df = float_frame.copy()
mask = mask_type(df)

# index with boolean mask
result = df.copy()
result[mask] = np.nan
result.mask(mask, np.nan, True)

expected = df.copy()
expected.values[np.array(mask)] = np.nan
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/frame/indexing/test_where.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def test_where_bug_transposition(self):
do_not_replace = b.isna() | (a > b)

expected = a.copy()
expected[~do_not_replace] = b
expected.mask(~do_not_replace, b, True)

result = a.where(do_not_replace, b)
tm.assert_frame_equal(result, expected)
Expand All @@ -347,7 +347,7 @@ def test_where_bug_transposition(self):
do_not_replace = b.isna() | (a > b)

expected = a.copy()
expected[~do_not_replace] = b
expected.mask(~do_not_replace, b, True)

result = a.where(do_not_replace, b)
tm.assert_frame_equal(result, expected)
Expand All @@ -368,7 +368,7 @@ def test_where_datetime(self):
with pytest.raises(TypeError, match=msg):
df > stamp

result = df[df.iloc[:, :-1] > stamp]
result = df.where(df.iloc[:, :-1] > stamp)

expected = df.copy()
expected.loc[[0, 1], "A"] = np.nan
Expand All @@ -379,7 +379,7 @@ def test_where_none(self):
# GH 4667
# setting with None changes dtype
df = DataFrame({"series": Series(range(10))}).astype(float)
df[df > 7] = None
df.mask(df > 7, None, True)
expected = DataFrame(
{"series": Series([0, 1, 2, 3, 4, 5, 6, 7, np.nan, np.nan])}
)
Expand Down Expand Up @@ -443,7 +443,7 @@ def test_where_complex(self):
# GH 6345
expected = DataFrame([[1 + 1j, 2], [np.nan, 4 + 1j]], columns=["a", "b"])
df = DataFrame([[1 + 1j, 2], [5 + 1j, 4 + 1j]], columns=["a", "b"])
df[df.abs() >= 5] = np.nan
df.mask(df.abs() >= 5, np.nan, True)
tm.assert_frame_equal(df, expected)

def test_where_axis(self):
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/methods/test_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ def test_clip_against_frame(self, axis):
ub_mask = df >= ub
mask = ~lb_mask & ~ub_mask

tm.assert_frame_equal(clipped_df[lb_mask], lb[lb_mask])
tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask])
tm.assert_frame_equal(clipped_df[mask], df[mask])
tm.assert_frame_equal(clipped_df.where(lb_mask), lb.where(lb_mask))
tm.assert_frame_equal(clipped_df.where(ub_mask), ub.where(ub_mask))
tm.assert_frame_equal(clipped_df.where(mask), df.where(mask))

def test_clip_against_unordered_columns(self):
# GH#20911
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,12 +992,12 @@ def test_operators_none_as_na(self, op):
filled = df.fillna(np.nan)
result = op(df, 3)
expected = op(filled, 3).astype(object)
expected[com.isna(expected)] = None
expected.mask(com.isna(expected), None, True)
tm.assert_frame_equal(result, expected)

result = op(df, df)
expected = op(filled, filled).astype(object)
expected[com.isna(expected)] = None
expected.mask(com.isna(expected), None, True)
tm.assert_frame_equal(result, expected)

result = op(df, df.fillna(7))
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def test_boolean_set_uncons(self, float_frame):
expected = float_frame.values.copy()
expected[expected > 1] = 2

float_frame[float_frame > 1] = 2
float_frame.mask(float_frame > 1, 2, True)
tm.assert_almost_equal(expected, float_frame.values)

def test_constructor_with_convert(self):
Expand Down
12 changes: 6 additions & 6 deletions pandas/tests/frame/test_nonunique_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,29 +323,29 @@ def test_getitem_boolean_series_with_duplicate_columns(self):
result = df[df.C > 6]
check(result, expected)

def test_getitem_boolean_frame_with_duplicate_columns(self):
def test_boolean_frame_with_duplicate_columns(self):
dups = ["A", "A", "C", "D"]

# where
df = DataFrame(
np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64"
)
# `df > 6` is a DataFrame with the same shape+alignment as df
expected = df[df > 6]
expected = df.where(df > 6)
expected.columns = dups
df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
result = df[df > 6]
result = df.where(df > 6)
check(result, expected)

def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self):
def test_boolean_frame_unaligned_with_duplicate_columns(self):
# `df.A > 6` is a DataFrame with a different shape from df
dups = ["A", "A", "C", "D"]

# boolean with the duplicate raises
df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64")
msg = "cannot reindex from a duplicate axis"
with pytest.raises(ValueError, match=msg):
df[df.A > 6]
df.where(df.A > 6)

def test_column_dups_indexing(self):

Expand Down Expand Up @@ -514,7 +514,7 @@ def test_masking_duplicate_columns_mixed_dtypes(
df2 = DataFrame(np.array(data2))
df = pd.concat([df1, df2], axis=1)

result = df[df > 2]
result = df.where(df > 2)
expected = DataFrame(
{i: np.array(col) for i, col in enumerate(expected_data)}
).rename(columns={2: 0, 3: 1})
Expand Down
Loading