Skip to content

TST: expand tests for ExtensionArray setitem with nullable arrays #31741

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 13, 2020
92 changes: 80 additions & 12 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import pytest

import pandas as pd
from pandas.core.arrays.numpy_ import PandasDtype

from .base import BaseExtensionTests

Expand Down Expand Up @@ -93,6 +92,86 @@ def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
df.iloc[10, 1] = data[1]
assert df.loc[10, "B"] == data[1]

@pytest.mark.parametrize(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

think about adding a new class here; these are specifically for setitem_mask (could be a followon)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

think about adding a new class here; these are specifically for setitem_mask (could be a followon)

In general, changing names is annoying for downstream consumers of those tests (this are the base extension tests), so not worth here I think

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I don't buy your argument, these are experimental features where we actually to expand test coverage. its going to happen (but ok for now)

"mask",
[
np.array([True, True, True, False, False]),
pd.array([True, True, True, False, False], dtype="boolean"),
],
ids=["numpy-array", "boolean-array"],
)
def test_setitem_mask(self, data, mask, box_in_series):
arr = data[:5].copy()
expected = arr.take([0, 0, 0, 3, 4])
if box_in_series:
arr = pd.Series(arr)
expected = pd.Series(expected)
arr[mask] = data[0]
self.assert_equal(expected, arr)

def test_setitem_mask_raises(self, data, box_in_series):
# wrong length
mask = np.array([True, False])

if box_in_series:
data = pd.Series(data)

with pytest.raises(IndexError):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure if an error message should be matched here and below?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added part of the message

data[mask] = data[0]

mask = pd.array(mask, dtype="boolean")
with pytest.raises(IndexError):
data[mask] = data[0]

def test_setitem_mask_boolean_array_raises(self, data, box_in_series):
# missing values in mask
mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
mask[:2] = pd.NA

if box_in_series:
data = pd.Series(data)

msg = (
"Cannot mask with a boolean indexer containing NA values|"
"cannot mask with array containing NA / NaN values"
)
with pytest.raises(ValueError, match=msg):
data[mask] = data[0]

@pytest.mark.parametrize(
"idx",
[[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
ids=["list", "integer-array", "numpy-array"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

labels should be: extension-integer-array, numpy-integer-array to be more informative

)
def test_setitem_integer_array(self, data, idx, box_in_series):
arr = data[:5].copy()
expected = data.take([0, 0, 0, 3, 4])

if box_in_series:
arr = pd.Series(arr)
expected = pd.Series(expected)

arr[idx] = arr[0]
self.assert_equal(arr, expected)

@pytest.mark.parametrize(
"idx",
[[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")],
ids=["list", "integer-array"],
)
def test_setitem_integer_with_missing_raises(self, data, idx):
arr = data.copy()

msg = "Cannot index with an integer indexer containing NA values"
with pytest.raises(ValueError, match=msg):
arr[idx] = arr[0]

# TODO this raises KeyError about labels not found (it tries label-based)
# import pandas._testing as tm
# s = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
# with pytest.raises(ValueError, match=msg):
# s[idx] = s[0]

@pytest.mark.parametrize("as_callable", [True, False])
@pytest.mark.parametrize("setter", ["loc", None])
def test_setitem_mask_aligned(self, data, as_callable, setter):
Expand Down Expand Up @@ -196,14 +275,3 @@ def test_setitem_preserves_views(self, data):
data[0] = data[1]
assert view1[0] == data[1]
assert view2[0] == data[1]

def test_setitem_nullable_mask(self, data):
# GH 31446
# TODO: there is some issue with PandasArray, therefore,
# TODO: skip the setitem test for now, and fix it later
if data.dtype != PandasDtype("object"):
arr = data[:5]
expected = data.take([0, 0, 0, 3, 4])
mask = pd.array([True, True, True, False, False])
arr[mask] = data[0]
self.assert_extension_array_equal(expected, arr)
37 changes: 37 additions & 0 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,43 @@ def test_setitem_scalar_key_sequence_raise(self, data):
# Failed: DID NOT RAISE <class 'ValueError'>
super().test_setitem_scalar_key_sequence_raise(data)

# TODO: there is some issue with PandasArray, therefore,
# skip the setitem test for now, and fix it later (GH 31446)

@skip_nested
def test_setitem_mask(self, data, box_in_series):
super().test_setitem_mask(data, box_in_series)

@skip_nested
def test_setitem_mask_boolean_array(self, data, box_in_series):
super().test_setitem_mask_boolean_array(data, box_in_series)

@skip_nested
def test_setitem_mask_raises(self, data, box_in_series):
super().test_setitem_mask_raises(data, box_in_series)

@skip_nested
def test_setitem_mask_boolean_array_raises(self, data, box_in_series):
super().test_setitem_mask_boolean_array_raises(data, box_in_series)

@skip_nested
@pytest.mark.parametrize(
"idx",
[[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
ids=["list", "integer-array", "numpy-array"],
)
def test_setitem_integer_array(self, data, idx, box_in_series):
super().test_setitem_integer_array(data, idx, box_in_series)

@skip_nested
@pytest.mark.parametrize(
"idx",
[[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")],
ids=["list", "integer-array"],
)
def test_setitem_integer_with_missing_raises(self, data, idx):
super().test_setitem_integer_with_missing_raises(data, idx)


@skip_nested
class TestParsing(BaseNumPyTests, base.BaseParsingTests):
Expand Down