Skip to content

BUG: Fix for _validate_setitem_value fails to raise for PandasArray #59336

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,7 @@ Sparse

ExtensionArray
^^^^^^^^^^^^^^
- Bug in :class:`NumpyExtensionArray` where error was not raised if value cannot be inserted without changing the dtype (:issue:`51044`)
- Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`)
- Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)
- Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`)
Expand Down
50 changes: 50 additions & 0 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pandas.core.dtypes.dtypes import NumpyEADtype
from pandas.core.dtypes.missing import isna

import pandas as pd
from pandas.core import (
arraylike,
missing,
Expand Down Expand Up @@ -236,6 +237,55 @@ def _values_for_factorize(self) -> tuple[np.ndarray, float | None]:
fv = np.nan
return self._ndarray, fv

def _validate_setitem_value(self, value):
"""
Check if we have a scalar that we can cast losslessly.

Raises
------
TypeError
"""

if type(value) == self.dtype.type:
return value

if (
isinstance(value, NumpyExtensionArray)
or isinstance(value, np.ndarray)
or isinstance(value, pd.Series)
) and value.dtype == self.dtype:
return value

if (
isinstance(value, list)
or isinstance(value, NumpyExtensionArray)
or isinstance(value, np.ndarray)
or isinstance(value, pd.Series)
):
try:
_ = pd.array(value, dtype=self.dtype)
return value
except ValueError:
print("Caught the error")

if (
(
(lib.is_integer(value) or lib.is_float(value))
and self.dtype.kind in "iuf"
)
or (isinstance(value, str) and self.dtype.kind in "US")
or (self.dtype.kind == "O")
) and not isna(value):
if self.dtype.type(value) == value:
return value

if isna(value):
return value

raise TypeError(
f"Invalid value '{value!s}' with type {type(value)} for dtype {self.dtype}"
)

# Base EA class (and all other EA classes) don't have limit_area keyword
# This can be removed here as well when the interpolate ffill/bfill method
# deprecation is enforced
Expand Down
101 changes: 98 additions & 3 deletions pandas/tests/arrays/numpy_/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
the interface tests.
"""

from datetime import datetime

import numpy as np
import pytest

Expand Down Expand Up @@ -195,6 +197,93 @@ def test_validate_reduction_keyword_args():
arr.all(keepdims=True)


@pytest.mark.parametrize(
"value, expectedError",
[
(True, True),
(5, False),
(5.0, False),
(5.5, True),
(1 + 2j, True),
("t", True),
(datetime.now(), True),
],
)
def test_int_arr_validate_setitem_value(value, expectedError):
arr = pd.Series(range(5), dtype="int").array
if expectedError:
with pytest.raises(TypeError):
arr._validate_setitem_value(value)
else:
arr[0] = value
assert arr[0] == value


@pytest.mark.parametrize(
"value, expectedError",
[
(True, True),
(5, False),
(5.0, False),
(5.5, True),
(1 + 2j, True),
("t", True),
(datetime.now(), True),
],
)
def test_uint_arr_validate_setitem_value(value, expectedError):
arr = pd.Series(range(5), dtype="uint").array
if expectedError:
with pytest.raises(TypeError):
arr._validate_setitem_value(value)
else:
arr[0] = value
assert arr[0] == value


@pytest.mark.parametrize(
"value, expectedError",
[
(True, True),
(5, False),
(5.0, False),
(5.5, False),
(1 + 2j, True),
("t", True),
(datetime.now(), True),
],
)
def test_float_arr_validate_setitem_value(value, expectedError):
arr = pd.Series(range(5), dtype="float").array
if expectedError:
with pytest.raises(TypeError):
arr._validate_setitem_value(value)
else:
arr[0] = value
assert arr[0] == value


@pytest.mark.parametrize(
"value, expectedError",
[
(True, True),
(5, True),
(5.0, True),
(5.5, True),
("t", False),
(datetime.now(), True),
],
)
def test_str_arr_validate_setitem_value(value, expectedError):
arr = NumpyExtensionArray(np.array(["foo", "bar", "test"], dtype="str"))
if expectedError:
with pytest.raises(TypeError):
arr._validate_setitem_value(value)
else:
arr[0] = value
assert arr[0] == str(value)


def test_np_max_nested_tuples():
# case where checking in ufunc.nout works while checking for tuples
# does not
Expand Down Expand Up @@ -275,12 +364,15 @@ def test_setitem_object_typecode(dtype):
def test_setitem_no_coercion():
# https://github.com/pandas-dev/pandas/issues/28150
arr = NumpyExtensionArray(np.array([1, 2, 3]))
with pytest.raises(ValueError, match="int"):
with pytest.raises(TypeError):
arr[0] = "a"

# With a value that we do coerce, check that we coerce the value
# and not the underlying array.
arr[0] = 2.5
with pytest.raises(TypeError):
arr[0] = 2.5

arr[0] = 9
assert isinstance(arr[0], (int, np.integer)), type(arr[0])


Expand All @@ -296,7 +388,10 @@ def test_setitem_preserves_views():
assert view2[0] == 9
assert view3[0] == 9

arr[-1] = 2.5
with pytest.raises(TypeError):
arr[-1] = 2.5

arr[-1] = 4
view1[-1] = 5
assert arr[-1] == 5

Expand Down
Loading