Skip to content

Remove undefined behavior from npy_datetimestruct_to_datetime #55151

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 46 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
e27a0da
refactor npy_datetimestruct_to_datetime
WillAyd Sep 15, 2023
0dea606
Used builtin overflow directives
WillAyd Oct 17, 2023
d6a24f3
macro fixups
WillAyd Oct 17, 2023
21e919c
more macro cleanups
WillAyd Oct 17, 2023
6302f2f
more macro refactor
WillAyd Oct 17, 2023
e2646a6
musllinux c support
WillAyd Oct 18, 2023
5852d20
Merge remote-tracking branch 'upstream/main' into refactor-np-datetime
WillAyd Oct 19, 2023
f55a58a
macro cleanup
WillAyd Oct 19, 2023
057e74f
more refactor
WillAyd Oct 19, 2023
2d6d5fc
fix cython warning
WillAyd Oct 19, 2023
e65e229
windows fix
WillAyd Oct 19, 2023
af29e7c
Raise Outofboundsdatetime
WillAyd Oct 20, 2023
b69b489
Merge remote-tracking branch 'upstream/main' into refactor-np-datetime
WillAyd Oct 20, 2023
f9e5e35
cleanup GIL warnings
WillAyd Oct 20, 2023
1b89dfe
more error handling cleanup
WillAyd Oct 20, 2023
73a1507
updates
WillAyd Oct 20, 2023
ef24509
error message update
WillAyd Oct 20, 2023
2240b09
error fixups
WillAyd Oct 20, 2023
1640002
test fixup
WillAyd Oct 22, 2023
9cdb9c9
clang-format
WillAyd Oct 23, 2023
9e0cec0
Merge branch 'main' into refactor-np-datetime
WillAyd Oct 23, 2023
25f3edf
Merge branch 'main' into refactor-np-datetime
WillAyd Oct 24, 2023
5c09a13
Merge branch 'main' into refactor-np-datetime
WillAyd Oct 26, 2023
4d8696c
updates
WillAyd Oct 26, 2023
b3d5b7c
fixed error message
WillAyd Oct 26, 2023
2f60947
try nogil
WillAyd Oct 26, 2023
f26c924
Merge remote-tracking branch 'upstream/main' into refactor-np-datetime
WillAyd Nov 1, 2023
03315fc
revert offsets changes
WillAyd Nov 1, 2023
b3a80b4
simplified error handling
WillAyd Nov 1, 2023
038db5f
period fixup
WillAyd Nov 1, 2023
df2a4df
fixed test failure
WillAyd Nov 1, 2023
1afb12d
try speedup
WillAyd Nov 1, 2023
e293642
updated benchmark
WillAyd Nov 2, 2023
2203421
Merge remote-tracking branch 'upstream/main' into refactor-np-datetime
WillAyd Nov 2, 2023
7ee90dd
revert noexcepts
WillAyd Nov 3, 2023
81f32a9
Merge remote-tracking branch 'upstream/main' into refactor-np-datetime
WillAyd Nov 3, 2023
8d918dc
Merge branch 'main' into refactor-np-datetime
WillAyd Nov 6, 2023
7e8571f
Merge remote-tracking branch 'upstream/main' into refactor-np-datetime
WillAyd Nov 8, 2023
07e8106
Merge branch 'main' into refactor-np-datetime
WillAyd Nov 10, 2023
eab64ac
shared function for dts formatting
WillAyd Nov 10, 2023
7fbd191
import -> cimport
WillAyd Nov 10, 2023
4eba919
pass address
WillAyd Nov 10, 2023
9e1914d
typo
WillAyd Nov 10, 2023
188eb10
Merge branch 'main' into refactor-np-datetime
WillAyd Nov 13, 2023
162c858
remove comment
WillAyd Nov 14, 2023
de90a2f
cdef object -> str
WillAyd Nov 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ project(
meson_version: '>=1.0.1',
default_options: [
'buildtype=release',
'c_std=c99'
'c_std=c11'
]
)

Expand Down
291 changes: 199 additions & 92 deletions pandas/_libs/src/vendored/numpy/datetime/np_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,29 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
#include <numpy/ndarraytypes.h>
#include "pandas/vendored/numpy/datetime/np_datetime.h"

#if defined(_MSVC_VER)
#include <intsafe.h>
#define checked_int64_add(a, b, res) LongLongAdd(a, b, res)
#define checked_int64_sub(a, b, res) LongLongSub(a, b, res)
#define checked_int64_mul(a, b, res) LongLongMul(a, b, res)
#else
#if !__has_builtin(__builtin_add_overflow)
_Static_assert(0, "Overflow checking not detected; please try a newer compiler");
#endif
#if _LP64 || __LP64__ || _ILP64 || __ILP64__
#define checked_int64_add(a, b, res) __builtin_saddl_overflow(a, b, res)
#define checked_int64_sub(a, b, res) __builtin_ssubl_overflow(a, b, res)
#define checked_int64_mul(a, b, res) __builtin_smull_overflow(a, b, res)
#else
#define checked_int64_add(a, b, res) __builtin_saddll_overflow(a, b, res)
#define checked_int64_sub(a, b, res) __builtin_ssubll_overflow(a, b, res)
#define checked_int64_mul(a, b, res) __builtin_smulll_overflow(a, b, res)
#endif
#endif

// CHECK_OVERFLOW can be used in functions which define a
// OVERFLOW_OCCURRED goto label
#define CHECK_OVERFLOW(FUNC) do { if ((FUNC) != 0) goto OVERFLOW_OCCURRED; } while (0)

const int days_per_month_table[2][12] = {
{31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
Expand Down Expand Up @@ -301,104 +324,188 @@ PyObject *extract_utc_offset(PyObject *obj) {
return tmp;
}


static inline int scaleYearToEpoch(int64_t year, int64_t* result) {
return checked_int64_sub(year, 1970, result);
}

static inline int scaleYearsToMonths(int64_t years, int64_t* result) {
return checked_int64_mul(years, 12, result);
}

static inline int scaleDaysToWeeks(int64_t days, int64_t* result) {
if (days >= 0) {
*result = days / 7;
return 0;
} else {
int res;
int64_t checked_days;
if ((res = checked_int64_sub(days, 6, &checked_days))) {
return res;
}

*result = checked_days / 7;
return 0;
}
}

static inline int scaleDaysToHours(int64_t days, int64_t* result) {
return checked_int64_mul(days, 24, result);
}

static inline int scaleHoursToMinutes(int64_t hours, int64_t* result) {
return checked_int64_mul(hours, 60, result);
}

static inline int scaleMinutesToSeconds(int64_t minutes, int64_t* result) {
return checked_int64_mul(minutes, 60, result);
}

static inline int scaleSecondsToMilliseconds(int64_t seconds, int64_t* result) {
return checked_int64_mul(seconds, 1000, result);
}

static inline int scaleSecondsToMicroseconds(int64_t seconds, int64_t* result) {
return checked_int64_mul(seconds, 1000000, result);
}

static inline int scaleMicrosecondsToNanoseconds(int64_t microseconds,
int64_t* result) {
return checked_int64_mul(microseconds, 1000, result);
}

static inline int scaleMicrosecondsToPicoseconds(int64_t microseconds,
int64_t* result) {
return checked_int64_mul(microseconds, 1000000, result);
}

static inline int64_t scalePicosecondsToFemtoseconds(int64_t picoseconds,
int64_t* result) {
return checked_int64_mul(picoseconds, 1000, result);
}

static inline int64_t scalePicosecondsToAttoseconds(int64_t picoseconds,
int64_t* result) {
return checked_int64_mul(picoseconds, 1000000, result);
}

/*
* Converts a datetime from a datetimestruct to a datetime based
* on a metadata unit. The date is assumed to be valid.
* on a metadata unit. Returns -1 on and sets PyErr on error.
*/
npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
const npy_datetimestruct *dts) {
npy_datetime ret;

if (base == NPY_FR_Y) {
/* Truncate to the year */
ret = dts->year - 1970;
} else if (base == NPY_FR_M) {
/* Truncate to the month */
ret = 12 * (dts->year - 1970) + (dts->month - 1);
} else {
/* Otherwise calculate the number of days to start */
npy_int64 days = get_datetimestruct_days(dts);

switch (base) {
case NPY_FR_W:
/* Truncate to weeks */
if (days >= 0) {
ret = days / 7;
} else {
ret = (days - 6) / 7;
}
break;
case NPY_FR_D:
ret = days;
break;
case NPY_FR_h:
ret = days * 24 + dts->hour;
break;
case NPY_FR_m:
ret = (days * 24 + dts->hour) * 60 + dts->min;
break;
case NPY_FR_s:
ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec;
break;
case NPY_FR_ms:
ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 +
dts->sec) *
1000 +
dts->us / 1000;
break;
case NPY_FR_us:
ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 +
dts->sec) *
1000000 +
dts->us;
break;
case NPY_FR_ns:
ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 +
dts->sec) *
1000000 +
dts->us) *
1000 +
dts->ps / 1000;
break;
case NPY_FR_ps:
ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 +
dts->sec) *
1000000 +
dts->us) *
1000000 +
dts->ps;
break;
case NPY_FR_fs:
/* only 2.6 hours */
ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 +
dts->sec) *
1000000 +
dts->us) *
1000000 +
dts->ps) *
1000 +
dts->as / 1000;
break;
case NPY_FR_as:
/* only 9.2 secs */
ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 +
dts->sec) *
1000000 +
dts->us) *
1000000 +
dts->ps) *
1000000 +
dts->as;
break;
default:
/* Something got corrupted */
PyErr_SetString(
PyExc_ValueError,
"NumPy datetime metadata with corrupt unit value");
return -1;
}
if ((base == NPY_FR_Y) || (base == NPY_FR_M)) {
int64_t years;
CHECK_OVERFLOW(scaleYearToEpoch(dts->year, &years));

if (base == NPY_FR_Y) {
return years;
}

int64_t months;
CHECK_OVERFLOW(scaleYearsToMonths(years, &months));

int64_t months_adder;
CHECK_OVERFLOW(checked_int64_sub(dts->month, 1, &months_adder));
CHECK_OVERFLOW(checked_int64_add(months, months_adder, &months));

if (base == NPY_FR_M) {
return months;
}
}

const int64_t days = get_datetimestruct_days(dts);
if (base == NPY_FR_D) {
return days;
}

if (base == NPY_FR_W) {
int64_t weeks;
CHECK_OVERFLOW(scaleDaysToWeeks(days, &weeks));
return weeks;
}

int64_t hours;
CHECK_OVERFLOW(scaleDaysToHours(days, &hours));
CHECK_OVERFLOW(checked_int64_add(hours, dts->hour, &hours));

if (base == NPY_FR_h) {
return hours;
}
return ret;


int64_t minutes;
CHECK_OVERFLOW(scaleHoursToMinutes(hours, &minutes));
CHECK_OVERFLOW(checked_int64_add(minutes, dts->min, &minutes));

if (base == NPY_FR_m) {
return minutes;
}

int64_t seconds;
CHECK_OVERFLOW(scaleMinutesToSeconds(minutes, &seconds));
CHECK_OVERFLOW(checked_int64_add(seconds, dts->sec, &seconds));

if (base == NPY_FR_s) {
return seconds;
}

if (base == NPY_FR_ms) {
int64_t milliseconds;
CHECK_OVERFLOW(scaleSecondsToMilliseconds(seconds, &milliseconds));
CHECK_OVERFLOW(checked_int64_add(milliseconds, dts->us / 1000, &milliseconds));

return milliseconds;
}

int64_t microseconds;
CHECK_OVERFLOW(scaleSecondsToMicroseconds(seconds, &microseconds));
CHECK_OVERFLOW(checked_int64_add(microseconds, dts->us, &microseconds));

if (base == NPY_FR_us) {
return microseconds;
}

if (base == NPY_FR_ns) {
int64_t nanoseconds;
CHECK_OVERFLOW(scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds));
CHECK_OVERFLOW(checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds));

return nanoseconds;
}

int64_t picoseconds;
CHECK_OVERFLOW(scaleMicrosecondsToPicoseconds(microseconds, &picoseconds));
CHECK_OVERFLOW(checked_int64_add(picoseconds, dts->ps, &picoseconds));

if (base == NPY_FR_ps) {
return picoseconds;
}

if (base == NPY_FR_fs) {
int64_t femtoseconds;
CHECK_OVERFLOW(scalePicosecondsToFemtoseconds(picoseconds, &femtoseconds));
CHECK_OVERFLOW(checked_int64_add(femtoseconds, dts->as / 1000, &femtoseconds));
return femtoseconds;
}

if (base == NPY_FR_as) {
int64_t attoseconds;
CHECK_OVERFLOW(scalePicosecondsToAttoseconds(picoseconds, &attoseconds));
CHECK_OVERFLOW(checked_int64_add(attoseconds, dts->as, &attoseconds));
return attoseconds;
}

/* Something got corrupted */
PyErr_SetString(PyExc_ValueError,
"NumPy datetime metadata with corrupt unit value");
return -1;

OVERFLOW_OCCURRED:
PyErr_SetString(PyExc_OverflowError,
"Overflow occurred in npy_datetimestruct_to_datetime");
return -1;
}

/*
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,6 @@ cpdef array_to_datetime(

elif PyDate_Check(val):
iresult[i] = pydate_to_dt64(val, &dts)
check_dts_bounds(&dts)

elif is_datetime64_object(val):
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -764,5 +764,4 @@ cdef int64_t parse_pydatetime(
result = val.as_unit("ns")._value
else:
result = pydatetime_to_dt64(val, dts)
check_dts_bounds(dts)
return result
6 changes: 3 additions & 3 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ cdef extern from "pandas/datetime/pd_datetime.h":
npy_datetimestruct *result) nogil

npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr,
npy_datetimestruct *d) nogil
npy_datetimestruct *d) nogil except? -1

void pandas_timedelta_to_timedeltastruct(npy_timedelta val,
NPY_DATETIMEUNIT fr,
Expand All @@ -89,11 +89,11 @@ cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=?)

cdef int64_t pydatetime_to_dt64(
datetime val, npy_datetimestruct *dts, NPY_DATETIMEUNIT reso=?
)
) except? -1
cdef void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts) noexcept
cdef int64_t pydate_to_dt64(
date val, npy_datetimestruct *dts, NPY_DATETIMEUNIT reso=?
)
) except? -1
cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts) noexcept

cdef npy_datetime get_datetime64_value(object obj) noexcept nogil
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ cdef void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts) noexcept:

cdef int64_t pydatetime_to_dt64(datetime val,
npy_datetimestruct *dts,
NPY_DATETIMEUNIT reso=NPY_FR_ns):
NPY_DATETIMEUNIT reso=NPY_FR_ns) except? -1:
"""
Note we are assuming that the datetime object is timezone-naive.
"""
Expand All @@ -280,7 +280,7 @@ cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts) noexcept:

cdef int64_t pydate_to_dt64(
date val, npy_datetimestruct *dts, NPY_DATETIMEUNIT reso=NPY_FR_ns
):
) except? -1:
pydate_to_dtstruct(val, dts)
return npy_datetimestruct_to_datetime(reso, dts)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/tslibs/test_array_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def test_coerce_outside_ns_bounds(invalid_date, errors):
if errors == "raise":
msg = "^Out of bounds nanosecond timestamp: .*, at position 0$"

with pytest.raises(ValueError, match=msg):
with pytest.raises(OverflowError, match=msg):
tslib.array_to_datetime(**kwargs)
else: # coerce.
result, _ = tslib.array_to_datetime(**kwargs)
Expand Down