API: Special case how numpy scalars are coerced to signed integer

seberg · seberg · commit a2e76ff3dc1e · 2020-10-01T10:57:06.000-05:00
This removes one of the larger changes to array-coercion, which meant that NumPy scalars were always coerced like a 0-D array would be (i.e. using normal casting). When the assignment is explicitly an integer, now `scalar.__int__()` will be used instead (as was the case previously). Since previously this was handled differently, a *single* scalar is still converted using casting: np.array(np.float64(np.nan), dtype=np.int64) succeeds, but any other thing fails, such as: np.array([np.float64(np.nan)], dtype=np.int64) arr1d_int64[()] = np.float64(np.nan) np.array(np.array(np.nan), dtype=np.int64) This does not affect Python scalars, that always raise, because they always are converted using `scalar.__int__()`. Unsigned integers always supported casting from their signed equivalent, so the difference is much less visible for them and this chooses to always use the casting behaviour. The main reason for this change is to help pands: pandas-dev/pandas#35481
diff --git a/doc/release/upcoming_changes/16200.compatibility.rst b/doc/release/upcoming_changes/16200.compatibility.rst
@@ -8,14 +8,26 @@ error::
 
     np.array([np.float64(np.nan)], dtype=np.int64)
 
-will succeed at this time (this may change) and return an undefined result
-(usually the smallest possible integer).  This also affects assignments::
+will succeed and return an undefined result (usually the smallest possible
+integer).  This also affects assignments::
 
     arr[0] = np.float64(np.nan)
 
-Note, this already happened for ``np.array(np.float64(np.nan), dtype=np.int64)``
-and that the behaviour is unchanged for ``np.nan`` itself which is a Python
-float.
+At this time, NumPy retains the behaviour for::
+
+    np.array(np.float64(np.nan), dtype=np.int64)
+
+The above changes do not affect Python scalars:
+
+    np.array([float("NaN")], dtype=np.int64)
+
+remains unaffected (``np.nan`` is a Python ``float``, not a NumPy one).
+Unlike signed integers, unsigned integers do not retain this special case,
+since they always behaved more like casting.
+The following code stops raising an error::
+
+    np.array([np.float64(np.nan)], dtype=np.uint64)
+
 To avoid backward compatibility issues, at this time assignment from
 ``datetime64`` scalar to strings of too short length remains supported.
 This means that ``np.asarray(np.datetime64("2020-10-10"), dtype="S5")``
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
@@ -1460,6 +1460,31 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
                 ((PyVoidScalarObject *)op)->flags,
                 NULL, op);
     }
+    else if (cache == 0 && newtype != NULL &&
+            PyDataType_ISSIGNED(newtype) && PyArray_IsScalar(op, Generic)) {
+        assert(ndim == 0);
+        /*
+         * This is an (possible) inconsistency where:
+         *
+         *     np.array(np.float64(np.nan), dtype=np.int64)
+         *
+         * behaves differently from:
+         *
+         *     np.array([np.float64(np.nan)], dtype=np.int64)
+         *     arr1d_int64[0] = np.float64(np.nan)
+         *     np.array(np.array(np.nan), dtype=np.int64)
+         *
+         * by not raising an error instead of using typical casting.
+         * The error is desirable, but to always error seems like a
+         * larger change to be considered at some other time and it is
+         * undesirable that 0-D arrays behave differently from scalars.
+         * This retains the behaviour, largely due to issues in pandas
+         * which relied on a try/except (although hopefully that will
+         * have a better solution at some point):
+         * https://github.com/pandas-dev/pandas/issues/35481
+         */
+        return PyArray_FromScalar(op, dtype);
+    }
 
     /* There was no array (or array-like) passed in directly. */
     if ((flags & NPY_ARRAY_WRITEBACKIFCOPY) ||
@@ -1480,7 +1505,8 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
     if (cache == NULL) {
         /* This is a single item. Set it directly. */
         assert(ndim == 0);
-        if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
+
+        if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) {
             Py_DECREF(ret);
             return NULL;
         }
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
@@ -304,6 +304,18 @@ python_builtins_are_known_scalar_types(
 }
 
 
+static int
+signed_integers_is_known_scalar_types(
+        PyArray_DTypeMeta *cls, PyTypeObject *pytype)
+{
+    if (python_builtins_are_known_scalar_types(cls, pytype)) {
+        return 1;
+    }
+    /* Convert our scalars (raise on too large unsigned and NaN, etc.) */
+    return PyType_IsSubtype(pytype, &PyGenericArrType_Type);
+}
+
+
 static int
 datetime_known_scalar_types(
         PyArray_DTypeMeta *cls, PyTypeObject *pytype)
@@ -549,6 +561,11 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
     dtype_class->common_dtype = default_builtin_common_dtype;
     dtype_class->common_instance = NULL;
 
+    if (PyTypeNum_ISSIGNED(dtype_class->type_num)) {
+        /* Convert our scalars (raise on too large unsigned and NaN, etc.) */
+        dtype_class->is_known_scalar_type = signed_integers_is_known_scalar_types;
+    }
+
     if (PyTypeNum_ISUSERDEF(descr->type_num)) {
         dtype_class->common_dtype = legacy_userdtype_common_dtype_function;
     }
diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
@@ -309,6 +309,13 @@ def test_scalar_coercion_same_as_cast_and_assignment(self, cast_to):
                 # coercion should also raise (error type may change)
                 with pytest.raises(Exception):
                     np.array(scalar, dtype=dtype)
+
+                if (isinstance(scalar, rational) and
+                        np.issubdtype(dtype, np.signedinteger)):
+                    return
+
+                with pytest.raises(Exception):
+                    np.array([scalar], dtype=dtype)
                 # assignment should also raise
                 res = np.zeros((), dtype=dtype)
                 with pytest.raises(Exception):
@@ -340,6 +347,30 @@ def test_default_dtype_instance(self, dtype_char):
         assert discovered_dtype == dtype
         assert discovered_dtype.itemsize == dtype.itemsize
 
+    @pytest.mark.parametrize("dtype", np.typecodes["Integer"])
+    def test_scalar_to_int_coerce_does_not_cast(self, dtype):
+        """
+        Signed integers are currently different in that they do not cast other
+        NumPy scalar, but instead use scalar.__int__(). The harcoded
+        exception to this rule is `np.array(scalar, dtype=integer)`.
+        """
+        dtype = np.dtype(dtype)
+        invalid_int = np.ulonglong(-1)
+
+        float_nan = np.float64(np.nan)
+
+        for scalar in [float_nan, invalid_int]:
+            # This is a special case using casting logic and thus not failing:
+            coerced = np.array(scalar, dtype=dtype)
+            cast = np.array(scalar).astype(dtype)
+            assert_array_equal(coerced, cast)
+
+            # However these fail:
+            with pytest.raises((ValueError, OverflowError)):
+                np.array([scalar], dtype=dtype)
+            with pytest.raises((ValueError, OverflowError)):
+                cast[()] = scalar
+
 
 class TestTimeScalars:
     @pytest.mark.parametrize("dtype", [np.int64, np.float32])
@@ -349,13 +380,21 @@ class TestTimeScalars:
              param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"),
              param(np.datetime64(1, "D"), id="datetime64[D]")],)
     def test_coercion_basic(self, dtype, scalar):
+        # Note the `[scalar]` is there because np.array(scalar) uses stricter
+        # `scalar.__int__()` rules for backward compatibility right now.
         arr = np.array(scalar, dtype=dtype)
         cast = np.array(scalar).astype(dtype)
-        ass = np.ones((), dtype=dtype)
-        ass[()] = scalar  # raises, as would np.array([scalar], dtype=dtype)
-
         assert_array_equal(arr, cast)
-        assert_array_equal(cast, cast)
+
+        ass = np.ones((), dtype=dtype)
+        if issubclass(dtype, np.integer):
+            with pytest.raises(TypeError):
+                # raises, as would np.array([scalar], dtype=dtype), this is
+                # conversion from times, but behaviour of integers.
+                ass[()] = scalar
+        else:
+            ass[()] = scalar
+            assert_array_equal(ass, cast)
 
     @pytest.mark.parametrize("dtype", [np.int64, np.float32])
     @pytest.mark.parametrize("scalar",