Skip to content

Commit e9295bf

Browse files
authored
Merge pull request numpy#21507 from ganesh-k13/bug_div_overflow
BUG: Better report integer division overflow
2 parents c6aee22 + f918491 commit e9295bf

File tree

3 files changed

+282
-37
lines changed

3 files changed

+282
-37
lines changed

numpy/core/src/umath/loops_modulo.dispatch.c.src

+80-35
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,21 @@
1212
// Provides the various *_LOOP macros
1313
#include "fast_loop_macros.h"
1414

15+
16+
#define DIVIDEBYZERO_OVERFLOW_CHECK(x, y, min_val, signed) \
17+
(NPY_UNLIKELY( \
18+
(signed) ? \
19+
((y == 0) || ((x == min_val) && (y == -1))) : \
20+
(y == 0)) \
21+
)
22+
23+
#define FLAG_IF_DIVIDEBYZERO(x) do { \
24+
if (NPY_UNLIKELY(x == 0)) { \
25+
npy_set_floatstatus_divbyzero(); \
26+
} \
27+
} while (0)
28+
29+
1530
#if NPY_SIMD && defined(NPY_HAVE_VSX4)
1631
typedef struct {
1732
npyv_u32x2 hi;
@@ -166,7 +181,6 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
166181
const int vstep = npyv_nlanes_@sfx@;
167182
#if @id@ == 2 /* divmod */
168183
npyv_lanetype_@sfx@ *dst2 = (npyv_lanetype_@sfx@ *) args[3];
169-
const npyv_@sfx@ vneg_one = npyv_setall_@sfx@(-1);
170184
npyv_b@len@ warn = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());
171185

172186
for (; len >= vstep; len -= vstep, src1 += vstep, src2 += vstep,
@@ -176,11 +190,11 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
176190
npyv_@sfx@ quo = vsx4_div_@sfx@(a, b);
177191
npyv_@sfx@ rem = npyv_sub_@sfx@(a, vec_mul(b, quo));
178192
npyv_b@len@ bzero = npyv_cmpeq_@sfx@(b, vzero);
179-
// when b is 0, 'cvtozero' forces the modulo to be 0 too
180-
npyv_@sfx@ cvtozero = npyv_select_@sfx@(bzero, vzero, vneg_one);
193+
// when b is 0, forces the remainder to be 0 too
194+
rem = npyv_select_@sfx@(bzero, vzero, rem);
181195
warn = npyv_or_@sfx@(bzero, warn);
182196
npyv_store_@sfx@(dst1, quo);
183-
npyv_store_@sfx@(dst2, npyv_and_@sfx@(cvtozero, rem));
197+
npyv_store_@sfx@(dst2, rem);
184198
}
185199

186200
if (!vec_all_eq(warn, vzero)) {
@@ -290,7 +304,8 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
290304
npyv_lanetype_@sfx@ *dst2 = (npyv_lanetype_@sfx@ *) args[3];
291305
const npyv_@sfx@ vneg_one = npyv_setall_@sfx@(-1);
292306
const npyv_@sfx@ vmin = npyv_setall_@sfx@(NPY_MIN_INT@len@);
293-
npyv_b@len@ warn = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());
307+
npyv_b@len@ warn_zero = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());
308+
npyv_b@len@ warn_overflow = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());
294309

295310
for (; len >= vstep; len -= vstep, src1 += vstep, src2 += vstep,
296311
dst1 += vstep, dst2 += vstep) {
@@ -310,10 +325,8 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
310325
npyv_b@len@ amin = npyv_cmpeq_@sfx@(a, vmin);
311326
npyv_b@len@ bneg_one = npyv_cmpeq_@sfx@(b, vneg_one);
312327
npyv_b@len@ overflow = npyv_and_@sfx@(bneg_one, amin);
313-
npyv_b@len@ error = npyv_or_@sfx@(bzero, overflow);
314-
// in case of overflow or b = 0, 'cvtozero' forces quo/rem to be 0
315-
npyv_@sfx@ cvtozero = npyv_select_@sfx@(error, vzero, vneg_one);
316-
warn = npyv_or_@sfx@(error, warn);
328+
warn_zero = npyv_or_@sfx@(bzero, warn_zero);
329+
warn_overflow = npyv_or_@sfx@(overflow, warn_overflow);
317330
#endif
318331
#if @id@ >= 1 /* remainder and divmod */
319332
// handle mixed case the way Python does
@@ -329,8 +342,14 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
329342
#if @id@ == 2 /* divmod */
330343
npyv_@sfx@ to_sub = npyv_select_@sfx@(or, vzero, vneg_one);
331344
quo = npyv_add_@sfx@(quo, to_sub);
332-
npyv_store_@sfx@(dst1, npyv_and_@sfx@(cvtozero, quo));
333-
npyv_store_@sfx@(dst2, npyv_and_@sfx@(cvtozero, rem));
345+
// Divide by zero
346+
quo = npyv_select_@sfx@(bzero, vzero, quo);
347+
rem = npyv_select_@sfx@(bzero, vzero, rem);
348+
// Overflow
349+
quo = npyv_select_@sfx@(overflow, vmin, quo);
350+
rem = npyv_select_@sfx@(overflow, vzero, rem);
351+
npyv_store_@sfx@(dst1, quo);
352+
npyv_store_@sfx@(dst2, rem);
334353
#else /* fmod and remainder */
335354
npyv_store_@sfx@(dst1, rem);
336355
if (NPY_UNLIKELY(vec_any_eq(b, vzero))) {
@@ -340,17 +359,27 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
340359
}
341360

342361
#if @id@ == 2 /* divmod */
343-
if (!vec_all_eq(warn, vzero)) {
362+
if (!vec_all_eq(warn_zero, vzero)) {
344363
npy_set_floatstatus_divbyzero();
345364
}
365+
if (!vec_all_eq(warn_overflow, vzero)) {
366+
npy_set_floatstatus_overflow();
367+
}
346368

347369
for (; len > 0; --len, ++src1, ++src2, ++dst1, ++dst2) {
348370
const npyv_lanetype_@sfx@ a = *src1;
349371
const npyv_lanetype_@sfx@ b = *src2;
350-
if (b == 0 || (a == NPY_MIN_INT@len@ && b == -1)) {
351-
npy_set_floatstatus_divbyzero();
352-
*dst1 = 0;
353-
*dst2 = 0;
372+
if (DIVIDEBYZERO_OVERFLOW_CHECK(a, b, NPY_MIN_INT@len@, NPY_TRUE)) {
373+
if (b == 0) {
374+
npy_set_floatstatus_divbyzero();
375+
*dst1 = 0;
376+
*dst2 = 0;
377+
}
378+
else {
379+
npy_set_floatstatus_overflow();
380+
*dst1 = NPY_MIN_INT@len@;
381+
*dst2 = 0;
382+
}
354383
}
355384
else {
356385
*dst1 = a / b;
@@ -365,8 +394,8 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
365394
for (; len > 0; --len, ++src1, ++src2, ++dst1) {
366395
const npyv_lanetype_@sfx@ a = *src1;
367396
const npyv_lanetype_@sfx@ b = *src2;
368-
if (NPY_UNLIKELY(b == 0)) {
369-
npy_set_floatstatus_divbyzero();
397+
if (DIVIDEBYZERO_OVERFLOW_CHECK(a, b, NPY_MIN_INT@len@, NPY_TRUE)) {
398+
FLAG_IF_DIVIDEBYZERO(b);
370399
*dst1 = 0;
371400
} else{
372401
*dst1 = a % b;
@@ -415,8 +444,6 @@ vsx4_simd_@func@_by_scalar_contig_@sfx@(char **args, npy_intp len)
415444
// (a == NPY_MIN_INT@len@ && b == -1)
416445
npyv_b@len@ amin = npyv_cmpeq_@sfx@(a, vmin);
417446
npyv_b@len@ overflow = npyv_and_@sfx@(bneg_one, amin);
418-
// in case of overflow, 'cvtozero' forces quo/rem to be 0
419-
npyv_@sfx@ cvtozero = npyv_select_@sfx@(overflow, vzero, vneg_one);
420447
warn = npyv_or_@sfx@(overflow, warn);
421448
#endif
422449
#if @id@ >= 1 /* remainder and divmod */
@@ -432,23 +459,26 @@ vsx4_simd_@func@_by_scalar_contig_@sfx@(char **args, npy_intp len)
432459
#if @id@ == 2 /* divmod */
433460
npyv_@sfx@ to_sub = npyv_select_@sfx@(or, vzero, vneg_one);
434461
quo = npyv_add_@sfx@(quo, to_sub);
435-
npyv_store_@sfx@(dst1, npyv_and_@sfx@(cvtozero, quo));
436-
npyv_store_@sfx@(dst2, npyv_and_@sfx@(cvtozero, rem));
462+
// Overflow: set quo to minimum and rem to 0
463+
quo = npyv_select_@sfx@(overflow, vmin, quo);
464+
rem = npyv_select_@sfx@(overflow, vzero, rem);
465+
npyv_store_@sfx@(dst1, quo);
466+
npyv_store_@sfx@(dst2, rem);
437467
#else /* fmod and remainder */
438468
npyv_store_@sfx@(dst1, rem);
439469
#endif
440470
}
441471

442472
#if @id@ == 2 /* divmod */
443473
if (!vec_all_eq(warn, vzero)) {
444-
npy_set_floatstatus_divbyzero();
474+
npy_set_floatstatus_overflow();
445475
}
446476

447477
for (; len > 0; --len, ++src1, ++dst1, ++dst2) {
448478
const npyv_lanetype_@sfx@ a = *src1;
449-
if (a == NPY_MIN_INT@len@ && scalar == -1) {
450-
npy_set_floatstatus_divbyzero();
451-
*dst1 = 0;
479+
if (NPY_UNLIKELY(a == NPY_MIN_INT@len@ && scalar == -1)) {
480+
npy_set_floatstatus_overflow();
481+
*dst1 = NPY_MIN_INT@len@;
452482
*dst2 = 0;
453483
}
454484
else {
@@ -524,8 +554,12 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_fmod)
524554
BINARY_LOOP {
525555
const @type@ in1 = *(@type@ *)ip1;
526556
const @type@ in2 = *(@type@ *)ip2;
527-
if (NPY_UNLIKELY(in2 == 0)) {
528-
npy_set_floatstatus_divbyzero();
557+
#if @signed@
558+
if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, NPY_MIN_@TYPE@, NPY_TRUE)) {
559+
#else
560+
if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, 0, NPY_FALSE)) {
561+
#endif
562+
FLAG_IF_DIVIDEBYZERO(in2);
529563
*((@type@ *)op1) = 0;
530564
} else{
531565
*((@type@ *)op1)= in1 % in2;
@@ -552,8 +586,12 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_remainder)
552586
BINARY_LOOP {
553587
const @type@ in1 = *(@type@ *)ip1;
554588
const @type@ in2 = *(@type@ *)ip2;
555-
if (NPY_UNLIKELY(in2 == 0)) {
556-
npy_set_floatstatus_divbyzero();
589+
#if @signed@
590+
if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, NPY_MIN_@TYPE@, NPY_TRUE)) {
591+
#else
592+
if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, 0, NPY_FALSE)) {
593+
#endif
594+
FLAG_IF_DIVIDEBYZERO(in2);
557595
*((@type@ *)op1) = 0;
558596
} else{
559597
#if @signed@
@@ -593,10 +631,17 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_divmod)
593631
const @type@ in1 = *(@type@ *)ip1;
594632
const @type@ in2 = *(@type@ *)ip2;
595633
/* see FIXME note for divide above */
596-
if (NPY_UNLIKELY(in2 == 0 || (in1 == NPY_MIN_@TYPE@ && in2 == -1))) {
597-
npy_set_floatstatus_divbyzero();
598-
*((@type@ *)op1) = 0;
599-
*((@type@ *)op2) = 0;
634+
if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, NPY_MIN_@TYPE@, NPY_TRUE)) {
635+
if (in2 == 0) {
636+
npy_set_floatstatus_divbyzero();
637+
*((@type@ *)op1) = 0;
638+
*((@type@ *)op2) = 0;
639+
}
640+
else {
641+
npy_set_floatstatus_overflow();
642+
*((@type@ *)op1) = NPY_MIN_@TYPE@;
643+
*((@type@ *)op2) = 0;
644+
}
600645
}
601646
else {
602647
/* handle mixed case the way Python does */
@@ -616,7 +661,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_divmod)
616661
BINARY_LOOP_TWO_OUT {
617662
const @type@ in1 = *(@type@ *)ip1;
618663
const @type@ in2 = *(@type@ *)ip2;
619-
if (NPY_UNLIKELY(in2 == 0)) {
664+
if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, 0, NPY_FALSE)) {
620665
npy_set_floatstatus_divbyzero();
621666
*((@type@ *)op1) = 0;
622667
*((@type@ *)op2) = 0;

numpy/core/src/umath/scalarmath.c.src

+10-2
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,13 @@ static NPY_INLINE int
161161
* #NAME = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
162162
* LONG, ULONG, LONGLONG, ULONGLONG#
163163
*/
164+
165+
#if @neg@
166+
#define DIVIDEBYZERO_CHECK (b == 0 || (a == NPY_MIN_@NAME@ && b == -1))
167+
#else
168+
#define DIVIDEBYZERO_CHECK (b == 0)
169+
#endif
170+
164171
static NPY_INLINE int
165172
@name@_ctype_divide(@type@ a, @type@ b, @type@ *out) {
166173
if (b == 0) {
@@ -169,7 +176,7 @@ static NPY_INLINE int
169176
}
170177
#if @neg@
171178
else if (b == -1 && a == NPY_MIN_@NAME@) {
172-
*out = a / b;
179+
*out = NPY_MIN_@NAME@;
173180
return NPY_FPE_OVERFLOW;
174181
}
175182
#endif
@@ -192,7 +199,7 @@ static NPY_INLINE int
192199

193200
static NPY_INLINE int
194201
@name@_ctype_remainder(@type@ a, @type@ b, @type@ *out) {
195-
if (a == 0 || b == 0) {
202+
if (DIVIDEBYZERO_CHECK) {
196203
*out = 0;
197204
if (b == 0) {
198205
return NPY_FPE_DIVIDEBYZERO;
@@ -213,6 +220,7 @@ static NPY_INLINE int
213220
#endif
214221
return 0;
215222
}
223+
#undef DIVIDEBYZERO_CHECK
216224
/**end repeat**/
217225

218226
/**begin repeat

0 commit comments

Comments
 (0)