Skip to content

Commit f00efd0

Browse files
authored
Assorted UBSAN cleanups (#55112)
* first round of fixes * fix up includes * updates * dedup logic * move comment
1 parent 81fb7e7 commit f00efd0

File tree

2 files changed

+41
-16
lines changed

2 files changed

+41
-16
lines changed

pandas/_libs/src/vendored/ujson/lib/ultrajsonenc.c

+7-1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ Numeric decoder derived from TCL library
4444
#include <float.h>
4545
#include <locale.h>
4646
#include <math.h>
47+
#include <stdint.h>
4748
#include <stdio.h>
4849
#include <stdlib.h>
4950
#include <string.h>
@@ -763,7 +764,12 @@ void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) {
763764

764765
void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) {
765766
char *wstr;
766-
JSUINT64 uvalue = (value < 0) ? -value : value;
767+
JSUINT64 uvalue;
768+
if (value == INT64_MIN) {
769+
uvalue = INT64_MAX + UINT64_C(1);
770+
} else {
771+
uvalue = (value < 0) ? -value : value;
772+
}
767773

768774
wstr = enc->offset;
769775
// Conversion. Number is reversed.

pandas/_libs/tslibs/np_datetime.pyx

+34-15
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
cimport cython
21
from cpython.datetime cimport (
32
PyDateTime_CheckExact,
43
PyDateTime_DATE_GET_HOUR,
@@ -18,6 +17,7 @@ from cpython.object cimport (
1817
Py_LT,
1918
Py_NE,
2019
)
20+
from libc.stdint cimport INT64_MAX
2121

2222
import_datetime()
2323
PandasDateTime_IMPORT
@@ -545,14 +545,14 @@ cdef ndarray astype_round_check(
545545
return iresult
546546

547547

548-
@cython.overflowcheck(True)
549548
cdef int64_t get_conversion_factor(
550549
NPY_DATETIMEUNIT from_unit,
551550
NPY_DATETIMEUNIT to_unit
552551
) except? -1:
553552
"""
554553
Find the factor by which we need to multiply to convert from from_unit to to_unit.
555554
"""
555+
cdef int64_t value, overflow_limit, factor
556556
if (
557557
from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
558558
or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
@@ -565,28 +565,44 @@ cdef int64_t get_conversion_factor(
565565
return 1
566566

567567
if from_unit == NPY_DATETIMEUNIT.NPY_FR_W:
568-
return 7 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
568+
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit)
569+
factor = 7
569570
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_D:
570-
return 24 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
571+
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit)
572+
factor = 24
571573
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_h:
572-
return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
574+
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit)
575+
factor = 60
573576
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_m:
574-
return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
577+
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit)
578+
factor = 60
575579
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_s:
576-
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
580+
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit)
581+
factor = 1000
577582
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ms:
578-
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
583+
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit)
584+
factor = 1000
579585
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_us:
580-
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
586+
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit)
587+
factor = 1000
581588
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ns:
582-
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
589+
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit)
590+
factor = 1000
583591
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ps:
584-
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
592+
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit)
593+
factor = 1000
585594
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs:
586-
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
595+
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit)
596+
factor = 1000
587597
else:
588598
raise ValueError("Converting from M or Y units is not supported.")
589599

600+
overflow_limit = INT64_MAX // factor
601+
if value > overflow_limit or value < -overflow_limit:
602+
raise OverflowError("result would overflow")
603+
604+
return factor * value
605+
590606

591607
cdef int64_t convert_reso(
592608
int64_t value,
@@ -595,7 +611,7 @@ cdef int64_t convert_reso(
595611
bint round_ok,
596612
) except? -1:
597613
cdef:
598-
int64_t res_value, mult, div, mod
614+
int64_t res_value, mult, div, mod, overflow_limit
599615

600616
if from_reso == to_reso:
601617
return value
@@ -624,9 +640,12 @@ cdef int64_t convert_reso(
624640
else:
625641
# e.g. ns -> us, risk of overflow, but no risk of lossy rounding
626642
mult = get_conversion_factor(from_reso, to_reso)
627-
with cython.overflowcheck(True):
643+
overflow_limit = INT64_MAX // mult
644+
if value > overflow_limit or value < -overflow_limit:
628645
# Note: caller is responsible for re-raising as OutOfBoundsTimedelta
629-
res_value = value * mult
646+
raise OverflowError("result would overflow")
647+
648+
res_value = value * mult
630649

631650
return res_value
632651

0 commit comments

Comments
 (0)