Skip to content

Commit a2e76ff

Browse files
committed
API: Special case how numpy scalars are coerced to signed integer
This removes one of the larger changes to array-coercion, which meant that NumPy scalars were always coerced like a 0-D array would be (i.e. using normal casting). When the assignment is explicitly an integer, now `scalar.__int__()` will be used instead (as was the case previously). Since previously this was handled differently, a *single* scalar is still converted using casting: np.array(np.float64(np.nan), dtype=np.int64) succeeds, but any other thing fails, such as: np.array([np.float64(np.nan)], dtype=np.int64) arr1d_int64[()] = np.float64(np.nan) np.array(np.array(np.nan), dtype=np.int64) This does not affect Python scalars, that always raise, because they always are converted using `scalar.__int__()`. Unsigned integers always supported casting from their signed equivalent, so the difference is much less visible for them and this chooses to always use the casting behaviour. The main reason for this change is to help pands: pandas-dev/pandas#35481
1 parent 6094508 commit a2e76ff

File tree

4 files changed

+104
-10
lines changed

4 files changed

+104
-10
lines changed

doc/release/upcoming_changes/16200.compatibility.rst

+17-5
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,26 @@ error::
88

99
np.array([np.float64(np.nan)], dtype=np.int64)
1010

11-
will succeed at this time (this may change) and return an undefined result
12-
(usually the smallest possible integer). This also affects assignments::
11+
will succeed and return an undefined result (usually the smallest possible
12+
integer). This also affects assignments::
1313

1414
arr[0] = np.float64(np.nan)
1515

16-
Note, this already happened for ``np.array(np.float64(np.nan), dtype=np.int64)``
17-
and that the behaviour is unchanged for ``np.nan`` itself which is a Python
18-
float.
16+
At this time, NumPy retains the behaviour for::
17+
18+
np.array(np.float64(np.nan), dtype=np.int64)
19+
20+
The above changes do not affect Python scalars:
21+
22+
np.array([float("NaN")], dtype=np.int64)
23+
24+
remains unaffected (``np.nan`` is a Python ``float``, not a NumPy one).
25+
Unlike signed integers, unsigned integers do not retain this special case,
26+
since they always behaved more like casting.
27+
The following code stops raising an error::
28+
29+
np.array([np.float64(np.nan)], dtype=np.uint64)
30+
1931
To avoid backward compatibility issues, at this time assignment from
2032
``datetime64`` scalar to strings of too short length remains supported.
2133
This means that ``np.asarray(np.datetime64("2020-10-10"), dtype="S5")``

numpy/core/src/multiarray/ctors.c

+27-1
Original file line numberDiff line numberDiff line change
@@ -1460,6 +1460,31 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
14601460
((PyVoidScalarObject *)op)->flags,
14611461
NULL, op);
14621462
}
1463+
else if (cache == 0 && newtype != NULL &&
1464+
PyDataType_ISSIGNED(newtype) && PyArray_IsScalar(op, Generic)) {
1465+
assert(ndim == 0);
1466+
/*
1467+
* This is an (possible) inconsistency where:
1468+
*
1469+
* np.array(np.float64(np.nan), dtype=np.int64)
1470+
*
1471+
* behaves differently from:
1472+
*
1473+
* np.array([np.float64(np.nan)], dtype=np.int64)
1474+
* arr1d_int64[0] = np.float64(np.nan)
1475+
* np.array(np.array(np.nan), dtype=np.int64)
1476+
*
1477+
* by not raising an error instead of using typical casting.
1478+
* The error is desirable, but to always error seems like a
1479+
* larger change to be considered at some other time and it is
1480+
* undesirable that 0-D arrays behave differently from scalars.
1481+
* This retains the behaviour, largely due to issues in pandas
1482+
* which relied on a try/except (although hopefully that will
1483+
* have a better solution at some point):
1484+
* https://github.com/pandas-dev/pandas/issues/35481
1485+
*/
1486+
return PyArray_FromScalar(op, dtype);
1487+
}
14631488

14641489
/* There was no array (or array-like) passed in directly. */
14651490
if ((flags & NPY_ARRAY_WRITEBACKIFCOPY) ||
@@ -1480,7 +1505,8 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
14801505
if (cache == NULL) {
14811506
/* This is a single item. Set it directly. */
14821507
assert(ndim == 0);
1483-
if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
1508+
1509+
if (PyArray_Pack(PyArray_DESCR(ret), PyArray_BYTES(ret), op) < 0) {
14841510
Py_DECREF(ret);
14851511
return NULL;
14861512
}

numpy/core/src/multiarray/dtypemeta.c

+17
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,18 @@ python_builtins_are_known_scalar_types(
304304
}
305305

306306

307+
static int
308+
signed_integers_is_known_scalar_types(
309+
PyArray_DTypeMeta *cls, PyTypeObject *pytype)
310+
{
311+
if (python_builtins_are_known_scalar_types(cls, pytype)) {
312+
return 1;
313+
}
314+
/* Convert our scalars (raise on too large unsigned and NaN, etc.) */
315+
return PyType_IsSubtype(pytype, &PyGenericArrType_Type);
316+
}
317+
318+
307319
static int
308320
datetime_known_scalar_types(
309321
PyArray_DTypeMeta *cls, PyTypeObject *pytype)
@@ -549,6 +561,11 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
549561
dtype_class->common_dtype = default_builtin_common_dtype;
550562
dtype_class->common_instance = NULL;
551563

564+
if (PyTypeNum_ISSIGNED(dtype_class->type_num)) {
565+
/* Convert our scalars (raise on too large unsigned and NaN, etc.) */
566+
dtype_class->is_known_scalar_type = signed_integers_is_known_scalar_types;
567+
}
568+
552569
if (PyTypeNum_ISUSERDEF(descr->type_num)) {
553570
dtype_class->common_dtype = legacy_userdtype_common_dtype_function;
554571
}

numpy/core/tests/test_array_coercion.py

+43-4
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,13 @@ def test_scalar_coercion_same_as_cast_and_assignment(self, cast_to):
309309
# coercion should also raise (error type may change)
310310
with pytest.raises(Exception):
311311
np.array(scalar, dtype=dtype)
312+
313+
if (isinstance(scalar, rational) and
314+
np.issubdtype(dtype, np.signedinteger)):
315+
return
316+
317+
with pytest.raises(Exception):
318+
np.array([scalar], dtype=dtype)
312319
# assignment should also raise
313320
res = np.zeros((), dtype=dtype)
314321
with pytest.raises(Exception):
@@ -340,6 +347,30 @@ def test_default_dtype_instance(self, dtype_char):
340347
assert discovered_dtype == dtype
341348
assert discovered_dtype.itemsize == dtype.itemsize
342349

350+
@pytest.mark.parametrize("dtype", np.typecodes["Integer"])
351+
def test_scalar_to_int_coerce_does_not_cast(self, dtype):
352+
"""
353+
Signed integers are currently different in that they do not cast other
354+
NumPy scalar, but instead use scalar.__int__(). The harcoded
355+
exception to this rule is `np.array(scalar, dtype=integer)`.
356+
"""
357+
dtype = np.dtype(dtype)
358+
invalid_int = np.ulonglong(-1)
359+
360+
float_nan = np.float64(np.nan)
361+
362+
for scalar in [float_nan, invalid_int]:
363+
# This is a special case using casting logic and thus not failing:
364+
coerced = np.array(scalar, dtype=dtype)
365+
cast = np.array(scalar).astype(dtype)
366+
assert_array_equal(coerced, cast)
367+
368+
# However these fail:
369+
with pytest.raises((ValueError, OverflowError)):
370+
np.array([scalar], dtype=dtype)
371+
with pytest.raises((ValueError, OverflowError)):
372+
cast[()] = scalar
373+
343374

344375
class TestTimeScalars:
345376
@pytest.mark.parametrize("dtype", [np.int64, np.float32])
@@ -349,13 +380,21 @@ class TestTimeScalars:
349380
param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"),
350381
param(np.datetime64(1, "D"), id="datetime64[D]")],)
351382
def test_coercion_basic(self, dtype, scalar):
383+
# Note the `[scalar]` is there because np.array(scalar) uses stricter
384+
# `scalar.__int__()` rules for backward compatibility right now.
352385
arr = np.array(scalar, dtype=dtype)
353386
cast = np.array(scalar).astype(dtype)
354-
ass = np.ones((), dtype=dtype)
355-
ass[()] = scalar # raises, as would np.array([scalar], dtype=dtype)
356-
357387
assert_array_equal(arr, cast)
358-
assert_array_equal(cast, cast)
388+
389+
ass = np.ones((), dtype=dtype)
390+
if issubclass(dtype, np.integer):
391+
with pytest.raises(TypeError):
392+
# raises, as would np.array([scalar], dtype=dtype), this is
393+
# conversion from times, but behaviour of integers.
394+
ass[()] = scalar
395+
else:
396+
ass[()] = scalar
397+
assert_array_equal(ass, cast)
359398

360399
@pytest.mark.parametrize("dtype", [np.int64, np.float32])
361400
@pytest.mark.parametrize("scalar",

0 commit comments

Comments
 (0)