-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Assorted UBSAN cleanups #55112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Assorted UBSAN cleanups #55112
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,3 @@ | ||
cimport cython | ||
from cpython.datetime cimport ( | ||
PyDateTime_CheckExact, | ||
PyDateTime_DATE_GET_HOUR, | ||
|
@@ -18,6 +17,7 @@ from cpython.object cimport ( | |
Py_LT, | ||
Py_NE, | ||
) | ||
from libc.stdint cimport INT64_MAX | ||
|
||
import_datetime() | ||
PandasDateTime_IMPORT | ||
|
@@ -545,14 +545,14 @@ cdef ndarray astype_round_check( | |
return iresult | ||
|
||
|
||
@cython.overflowcheck(True) | ||
cdef int64_t get_conversion_factor( | ||
NPY_DATETIMEUNIT from_unit, | ||
NPY_DATETIMEUNIT to_unit | ||
) except? -1: | ||
""" | ||
Find the factor by which we need to multiply to convert from from_unit to to_unit. | ||
""" | ||
cdef int64_t value, overflow_limit | ||
if ( | ||
from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC | ||
or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC | ||
|
@@ -565,25 +565,65 @@ cdef int64_t get_conversion_factor( | |
return 1 | ||
|
||
if from_unit == NPY_DATETIMEUNIT.NPY_FR_W: | ||
return 7 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit) | ||
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_D, to_unit) | ||
overflow_limit = INT64_MAX // 7 | ||
if value > overflow_limit or value < -overflow_limit: | ||
raise OverflowError("result would overflow") | ||
return 7 * value | ||
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_D: | ||
return 24 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit) | ||
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_h, to_unit) | ||
overflow_limit = INT64_MAX // 24 | ||
if value > overflow_limit or value < -overflow_limit: | ||
raise OverflowError("result would overflow") | ||
return 24 * value | ||
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_h: | ||
return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit) | ||
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_m, to_unit) | ||
overflow_limit = INT64_MAX // 60 | ||
if value > overflow_limit or value < -overflow_limit: | ||
raise OverflowError("result would overflow") | ||
return 60 * value | ||
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_m: | ||
return 60 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit) | ||
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_s, to_unit) | ||
overflow_limit = INT64_MAX // 60 | ||
if value > overflow_limit or value < -overflow_limit: | ||
raise OverflowError("result would overflow") | ||
return 60 * value | ||
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_s: | ||
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit) | ||
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ms, to_unit) | ||
overflow_limit = INT64_MAX // 1000 | ||
if value > overflow_limit or value < -overflow_limit: | ||
raise OverflowError("result would overflow") | ||
return 1000 * value | ||
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ms: | ||
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit) | ||
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_us, to_unit) | ||
overflow_limit = INT64_MAX // 1000 | ||
if value > overflow_limit or value < -overflow_limit: | ||
raise OverflowError("result would overflow") | ||
return 1000 * value | ||
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_us: | ||
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit) | ||
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ns, to_unit) | ||
overflow_limit = INT64_MAX // 1000 | ||
if value > overflow_limit or value < -overflow_limit: | ||
raise OverflowError("result would overflow") | ||
return 1000 * value | ||
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ns: | ||
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit) | ||
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_ps, to_unit) | ||
overflow_limit = INT64_MAX // 1000 | ||
if value > overflow_limit or value < -overflow_limit: | ||
raise OverflowError("result would overflow") | ||
return 1000 * value | ||
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_ps: | ||
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit) | ||
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_fs, to_unit) | ||
overflow_limit = INT64_MAX // 1000 | ||
if value > overflow_limit or value < -overflow_limit: | ||
raise OverflowError("result would overflow") | ||
return 1000 * value | ||
elif from_unit == NPY_DATETIMEUNIT.NPY_FR_fs: | ||
return 1000 * get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit) | ||
value = get_conversion_factor(NPY_DATETIMEUNIT.NPY_FR_as, to_unit) | ||
overflow_limit = INT64_MAX // 1000 | ||
if value > overflow_limit or value < -overflow_limit: | ||
raise OverflowError("result would overflow") | ||
return 1000 * value | ||
else: | ||
raise ValueError("Converting from M or Y units is not supported.") | ||
|
||
|
@@ -595,7 +635,7 @@ cdef int64_t convert_reso( | |
bint round_ok, | ||
) except? -1: | ||
cdef: | ||
int64_t res_value, mult, div, mod | ||
int64_t res_value, mult, div, mod, overflow_limit | ||
|
||
if from_reso == to_reso: | ||
return value | ||
|
@@ -624,9 +664,12 @@ cdef int64_t convert_reso( | |
else: | ||
# e.g. ns -> us, risk of overflow, but no risk of lossy rounding | ||
mult = get_conversion_factor(from_reso, to_reso) | ||
with cython.overflowcheck(True): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a cython bug? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think it's a bug per se. I think Cython let's the overflow happen but then adds checks after the fact to see if it overflowed. This by contrast prevents the overflow from happening in the first place. It generally gets you to the same place in the end There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like Cython generates something like this: static CYTHON_INLINE int __Pyx_mul_const_int_checking_overflow(int a, int b, int *overflow) {
if (b > 1) {
*overflow |= a > __PYX_MAX(int) / b;
*overflow |= a < __PYX_MIN(int) / b;
} else if (b == -1) {
*overflow |= a == __PYX_MIN(int);
} else if (b < -1) {
*overflow |= a > __PYX_MIN(int) / b;
*overflow |= a < __PYX_MAX(int) / b;
}
return a * b;
} We aren't handling a negative denominator, but otherwise yea the difference is Cython still does the multiplication and just sets an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for my edification, this pattern is considered Better Practice than the one cython uses? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the only difference here is that this will make the sanitizer happy whereas the cython approach will not |
||
# Note: caller is responsible for re-raising as OutOfBoundsTimedelta | ||
res_value = value * mult | ||
overflow_limit = INT64_MAX // mult | ||
if value > overflow_limit or value < -overflow_limit: | ||
raise OverflowError("result would overflow") | ||
|
||
# Note: caller is responsible for re-raising as OutOfBoundsTimedelta | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think this comment should go up a line with the OverflowError? |
||
res_value = value * mult | ||
|
||
return res_value | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we de-dup some of this with e.g.
if ...
value = get_conversion_factor(...
factor = 7
elif ...
value = get_conversion_factor(...
factor = 24
...
overflow_limit = INT64_MAX // factor
if value > ...
raise OverflowError(...)
return factor * value
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea no problem - great idea