diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index da0d85b7bb529..fc8b73921763a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -674,6 +674,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ +- Bug in :class:`NumpyExtensionArray` where error was not raised if value cannot be inserted without changing the dtype (:issue:`51044`) - Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) - Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index aafcd82114b97..8cbb29afc0d08 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -17,6 +17,7 @@ from pandas.core.dtypes.dtypes import NumpyEADtype from pandas.core.dtypes.missing import isna +import pandas as pd from pandas.core import ( arraylike, missing, @@ -236,6 +237,55 @@ def _values_for_factorize(self) -> tuple[np.ndarray, float | None]: fv = np.nan return self._ndarray, fv + def _validate_setitem_value(self, value): + """ + Check if we have a scalar that we can cast losslessly. + + Raises + ------ + TypeError + """ + + if type(value) == self.dtype.type: + return value + + if ( + isinstance(value, NumpyExtensionArray) + or isinstance(value, np.ndarray) + or isinstance(value, pd.Series) + ) and value.dtype == self.dtype: + return value + + if ( + isinstance(value, list) + or isinstance(value, NumpyExtensionArray) + or isinstance(value, np.ndarray) + or isinstance(value, pd.Series) + ): + try: + _ = pd.array(value, dtype=self.dtype) + return value + except ValueError: + print("Caught the error") + + if ( + ( + (lib.is_integer(value) or lib.is_float(value)) + and self.dtype.kind in "iuf" + ) + or (isinstance(value, str) and self.dtype.kind in "US") + or (self.dtype.kind == "O") + ) and not isna(value): + if self.dtype.type(value) == value: + return value + + if isna(value): + return value + + raise TypeError( + f"Invalid value '{value!s}' with type {type(value)} for dtype {self.dtype}" + ) + # Base EA class (and all other EA classes) don't have limit_area keyword # This can be removed here as well when the interpolate ffill/bfill method # deprecation is enforced diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index e86eb014465e1..2fd80a93b757e 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -3,6 +3,8 @@ the interface tests. """ +from datetime import datetime + import numpy as np import pytest @@ -195,6 +197,93 @@ def test_validate_reduction_keyword_args(): arr.all(keepdims=True) +@pytest.mark.parametrize( + "value, expectedError", + [ + (True, True), + (5, False), + (5.0, False), + (5.5, True), + (1 + 2j, True), + ("t", True), + (datetime.now(), True), + ], +) +def test_int_arr_validate_setitem_value(value, expectedError): + arr = pd.Series(range(5), dtype="int").array + if expectedError: + with pytest.raises(TypeError): + arr._validate_setitem_value(value) + else: + arr[0] = value + assert arr[0] == value + + +@pytest.mark.parametrize( + "value, expectedError", + [ + (True, True), + (5, False), + (5.0, False), + (5.5, True), + (1 + 2j, True), + ("t", True), + (datetime.now(), True), + ], +) +def test_uint_arr_validate_setitem_value(value, expectedError): + arr = pd.Series(range(5), dtype="uint").array + if expectedError: + with pytest.raises(TypeError): + arr._validate_setitem_value(value) + else: + arr[0] = value + assert arr[0] == value + + +@pytest.mark.parametrize( + "value, expectedError", + [ + (True, True), + (5, False), + (5.0, False), + (5.5, False), + (1 + 2j, True), + ("t", True), + (datetime.now(), True), + ], +) +def test_float_arr_validate_setitem_value(value, expectedError): + arr = pd.Series(range(5), dtype="float").array + if expectedError: + with pytest.raises(TypeError): + arr._validate_setitem_value(value) + else: + arr[0] = value + assert arr[0] == value + + +@pytest.mark.parametrize( + "value, expectedError", + [ + (True, True), + (5, True), + (5.0, True), + (5.5, True), + ("t", False), + (datetime.now(), True), + ], +) +def test_str_arr_validate_setitem_value(value, expectedError): + arr = NumpyExtensionArray(np.array(["foo", "bar", "test"], dtype="str")) + if expectedError: + with pytest.raises(TypeError): + arr._validate_setitem_value(value) + else: + arr[0] = value + assert arr[0] == str(value) + + def test_np_max_nested_tuples(): # case where checking in ufunc.nout works while checking for tuples # does not @@ -275,12 +364,15 @@ def test_setitem_object_typecode(dtype): def test_setitem_no_coercion(): # https://github.com/pandas-dev/pandas/issues/28150 arr = NumpyExtensionArray(np.array([1, 2, 3])) - with pytest.raises(ValueError, match="int"): + with pytest.raises(TypeError): arr[0] = "a" # With a value that we do coerce, check that we coerce the value # and not the underlying array. - arr[0] = 2.5 + with pytest.raises(TypeError): + arr[0] = 2.5 + + arr[0] = 9 assert isinstance(arr[0], (int, np.integer)), type(arr[0]) @@ -296,7 +388,10 @@ def test_setitem_preserves_views(): assert view2[0] == 9 assert view3[0] == 9 - arr[-1] = 2.5 + with pytest.raises(TypeError): + arr[-1] = 2.5 + + arr[-1] = 4 view1[-1] = 5 assert arr[-1] == 5