Skip to content

TST: tests using invalid_scalar fixture #44175

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 30, 2021
9 changes: 9 additions & 0 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
is_datetime64tz_dtype,
is_dtype_equal,
is_integer,
is_list_like,
is_object_dtype,
is_scalar,
is_string_dtype,
Expand Down Expand Up @@ -927,6 +928,14 @@ def __getitem__(
indices = np.arange(len(self), dtype=np.int32)[key]
return self.take(indices)

elif not is_list_like(key):
# e.g. "foo" or 2.5
# exception message copied from numpy
raise IndexError(
r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
r"(`None`) and integer or boolean arrays are valid indices"
)

else:
# TODO: I think we can avoid densifying when masking a
# boolean SparseArray with another. Need to look at the
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,13 @@ def __getitem__(
elif item[1] is Ellipsis:
item = item[0]

if is_scalar(item) and not is_integer(item):
# e.g. "foo" or 2.5
# exception message copied from numpy
raise IndexError(
r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
r"(`None`) and integer or boolean arrays are valid indices"
)
# We are not an array indexer, so maybe e.g. a slice or integer
# indexer. We dispatch to pyarrow.
value = self._data[item]
Expand Down Expand Up @@ -392,6 +399,11 @@ def _cmp_method(self, other, op):
# TODO(ARROW-9429): Add a .to_numpy() to ChunkedArray
return BooleanArray._from_sequence(result.to_pandas().values)

def insert(self, loc: int, item):
if not isinstance(item, str) and item is not libmissing.NA:
raise TypeError("Scalar must be NA or str")
return super().insert(loc, item)

def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None:
"""Set one or more values inplace.

Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,33 @@ def test_getitem_scalar(self, data):
result = pd.Series(data)[0]
assert isinstance(result, data.dtype.type)

def test_getitem_invalid(self, data):
# TODO: box over scalar, [scalar], (scalar,)?

msg = (
r"only integers, slices \(`:`\), ellipsis \(`...`\), numpy.newaxis "
r"\(`None`\) and integer or boolean arrays are valid indices"
)
with pytest.raises(IndexError, match=msg):
data["foo"]
with pytest.raises(IndexError, match=msg):
data[2.5]

ub = len(data)
msg = "|".join(
[
"list index out of range", # json
"index out of bounds", # pyarrow
"Out of bounds access", # Sparse
f"index {ub+1} is out of bounds for axis 0 with size {ub}",
f"index -{ub+1} is out of bounds for axis 0 with size {ub}",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  with pytest.raises(IndexError, match=msg):

E AssertionError: Regex pattern 'list index out of range|index out of bounds|Out of bounds access|index 101 is out of bounds for axis 0 with size 100|index -101 is out of bounds for axis 0 with size 100' does not match 'loc must be an integer between -100 and 100'.

i think these are asserting on 3.10 builds

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be addressed by just-pushed commit in #44242

]
)
with pytest.raises(IndexError, match=msg):
data[ub + 1]
with pytest.raises(IndexError, match=msg):
data[-ub - 1]

def test_getitem_scalar_na(self, data_missing, na_cmp, na_value):
result = data_missing[0]
assert na_cmp(result, na_value)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,3 +367,11 @@ def test_delitem_series(self, data):
expected = ser[taker]
del ser[1]
self.assert_series_equal(ser, expected)

def test_setitem_invalid(self, data, invalid_scalar):
msg = "" # messages vary by subclass, so we do not test it
with pytest.raises((ValueError, TypeError), match=msg):
data[0] = invalid_scalar

with pytest.raises((ValueError, TypeError), match=msg):
data[:] = invalid_scalar
12 changes: 11 additions & 1 deletion pandas/tests/extension/json/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@
from pandas._typing import type_t

from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
from pandas.core.dtypes.common import pandas_dtype
from pandas.core.dtypes.common import (
is_list_like,
pandas_dtype,
)

import pandas as pd
from pandas.api.extensions import (
Expand Down Expand Up @@ -103,6 +106,13 @@ def __getitem__(self, item):
elif isinstance(item, slice):
# slice
return type(self)(self.data[item])
elif not is_list_like(item):
# e.g. "foo" or 2.5
# exception message copied from numpy
raise IndexError(
r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
r"(`None`) and integer or boolean arrays are valid indices"
)
else:
item = pd.api.indexers.check_array_indexer(self, item)
if is_bool_dtype(item.dtype):
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,11 @@ def test_concat(self, data, in_frame):


class TestSetitem(BaseNumPyTests, base.BaseSetitemTests):
@skip_nested
def test_setitem_invalid(self, data, invalid_scalar):
# object dtype can hold anything, so doesn't raise
super().test_setitem_invalid(data, invalid_scalar)

@skip_nested
def test_setitem_sequence_broadcasts(self, data, box_in_series):
# ValueError: cannot set using a list-like indexer with a different
Expand Down
7 changes: 0 additions & 7 deletions pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,6 @@ def test_value_counts(self, all_data, dropna):
def test_value_counts_with_normalize(self, data):
pass

def test_insert_invalid(self, data, invalid_scalar, request):
if data.dtype.storage == "pyarrow":
mark = pytest.mark.xfail(reason="casts invalid_scalar to string")
request.node.add_marker(mark)

super().test_insert_invalid(data, invalid_scalar)


class TestCasting(base.BaseCastingTests):
pass
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,33 @@ def test_insert_base(self, index):
# test 0th element
assert index[0:4].equals(result.insert(0, index[0]))

def test_insert_out_of_bounds(self, index):
# TypeError/IndexError matches what np.insert raises in these cases

if len(index) > 0:
err = TypeError
else:
err = IndexError
if len(index) == 0:
# 0 vs 0.5 in error message varies with numpy version
msg = "index (0|0.5) is out of bounds for axis 0 with size 0"
else:
msg = "slice indices must be integers or None or have an __index__ method"
with pytest.raises(err, match=msg):
index.insert(0.5, "foo")

msg = "|".join(
[
r"index -?\d+ is out of bounds for axis 0 with size \d+",
"loc must be an integer between",
]
)
with pytest.raises(IndexError, match=msg):
index.insert(len(index) + 1, 1)

with pytest.raises(IndexError, match=msg):
index.insert(-len(index) - 1, 1)

def test_delete_base(self, index):
if not len(index):
return
Expand Down