From 153086f7829b075aa86ab81d3e6a083001da3e8a Mon Sep 17 00:00:00 2001 From: maushumee Date: Thu, 25 Jul 2024 22:02:48 -0400 Subject: [PATCH 1/2] Add _validate_setitem_value method to raise TypeError and fix tests --- pandas/core/arrays/numpy_.py | 79 ++++++++++++++++++++++++ pandas/tests/arrays/numpy_/test_numpy.py | 12 +++- 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 07eb91e0cb13b..8a0a7b7ef7c21 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -232,6 +232,85 @@ def _validate_scalar(self, fill_value): fill_value = self.dtype.na_value return fill_value + def _validate_setitem_value(self, value): + """ + Check if we have a scalar that we can cast losslessly. + + Raises + ------ + TypeError + """ + + kind = self.dtype.kind + + if kind == "b": + if lib.is_bool(value) or np.can_cast(type(value), self.dtype.type): + return value + if isinstance(value, NumpyExtensionArray) and ( + lib.is_bool_array(value.to_numpy()) + or lib.is_bool_list(value.to_numpy()) + ): + return value + + elif kind == "i": + if lib.is_integer(value) or np.can_cast(type(value), self.dtype.type): + return value + if isinstance(value, NumpyExtensionArray) and lib.is_integer_array( + value.to_numpy() + ): + return value + + elif kind == "u": + if (lib.is_integer(value) and value > -1) or np.can_cast( + type(value), self.dtype.type + ): + return value + + elif kind == "c": + if lib.is_complex(value) or np.can_cast(type(value), self.dtype.type): + return value + + elif kind == "S": + if isinstance(value, str) or np.can_cast(type(value), self.dtype.type): + return value + if isinstance(value, NumpyExtensionArray) and lib.is_string_array( + value.to_numpy() + ): + return value + + elif kind == "M": + if isinstance(value, np.datetime64): + return value + if isinstance(value, NumpyExtensionArray) and ( + lib.is_date_array(value.to_numpy()) + or lib.is_datetime_array(value.to_numpy()) + or lib.is_datetime64_array(value.to_numpy()) + or lib.is_datetime_with_singletz_array(value.to_numpy()) + ): + return value + + elif kind == "m": + if isinstance(value, np.timedelta64): + return value + if isinstance(value, NumpyExtensionArray) and ( + lib.is_timedelta_or_timedelta64_array(value.to_numpy()) + or lib.is_time_array(value.to_numpy()) + ): + return value + + elif kind == "f": + if lib.is_float(value) or np.can_cast(type(value), self.dtype.type): + return value + if isinstance(value, NumpyExtensionArray) and lib.is_float_array( + value.to_numpy() + ): + return value + + elif np.can_cast(type(value), self.dtype.type): + return value + + raise TypeError(f"Invalid value '{value!s}' for dtype {self.dtype}") + def _values_for_factorize(self) -> tuple[np.ndarray, float | None]: if self.dtype.kind in "iub": fv = None diff --git a/pandas/tests/arrays/numpy_/test_numpy.py b/pandas/tests/arrays/numpy_/test_numpy.py index e86eb014465e1..ed5dd1e17318d 100644 --- a/pandas/tests/arrays/numpy_/test_numpy.py +++ b/pandas/tests/arrays/numpy_/test_numpy.py @@ -275,12 +275,15 @@ def test_setitem_object_typecode(dtype): def test_setitem_no_coercion(): # https://github.com/pandas-dev/pandas/issues/28150 arr = NumpyExtensionArray(np.array([1, 2, 3])) - with pytest.raises(ValueError, match="int"): + with pytest.raises(TypeError): arr[0] = "a" # With a value that we do coerce, check that we coerce the value # and not the underlying array. - arr[0] = 2.5 + with pytest.raises(TypeError): + arr[0] = 2.5 + + arr[0] = 9 assert isinstance(arr[0], (int, np.integer)), type(arr[0]) @@ -296,7 +299,10 @@ def test_setitem_preserves_views(): assert view2[0] == 9 assert view3[0] == 9 - arr[-1] = 2.5 + with pytest.raises(TypeError): + arr[-1] = 2.5 + + arr[-1] = 4 view1[-1] = 5 assert arr[-1] == 5 From b29e151e1eecc0014bcf84ea665e83d6f06dd8d0 Mon Sep 17 00:00:00 2001 From: maushumee Date: Fri, 26 Jul 2024 09:35:00 -0400 Subject: [PATCH 2/2] Add code annotations and update documentation --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/arrays/numpy_.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e71220102cbb4..f2d0af4122214 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -627,6 +627,7 @@ ExtensionArray - Bug in :meth:`.arrays.ArrowExtensionArray.__setitem__` which caused wrong behavior when using an integer array with repeated values as a key (:issue:`58530`) - Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) - Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`) +- Bug in :class:`NumpyExtensionArray` where it did not raise any error if validated value to be inserted did not have the same dtype (:issue:`51044`). Styler ^^^^^^ diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 8a0a7b7ef7c21..40555e8413429 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -232,7 +232,7 @@ def _validate_scalar(self, fill_value): fill_value = self.dtype.na_value return fill_value - def _validate_setitem_value(self, value): + def _validate_setitem_value(self, value) -> type(value) | None: """ Check if we have a scalar that we can cast losslessly.