Skip to content

Commit 9680ef6

Browse files
author
Robert Schmidtke
committed
generalize pd.Timestamp.min nanosecond-specific handling of near-minimum timestamps into a multiply-add function
1 parent 1210fd1 commit 9680ef6

File tree

1 file changed

+46
-83
lines changed

1 file changed

+46
-83
lines changed

pandas/_libs/src/vendored/numpy/datetime/np_datetime.c

Lines changed: 46 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,41 @@ PyObject *extract_utc_offset(PyObject *obj) {
338338
return tmp;
339339
}
340340

341+
static int scaleAndOffset(int64_t timestamp, int64_t scale, int64_t offset,
342+
int64_t *result) {
343+
if (timestamp >= 0) {
344+
// fast path for positive timestamps
345+
if (checked_int64_mul(timestamp, scale, result) != 0) {
346+
return 1;
347+
}
348+
349+
return checked_int64_add(*result, offset, result);
350+
}
351+
352+
// for negative timestamps, scaling could overflow
353+
// even if the scaled and offset timestamp is valid
354+
const int overflow = checked_int64_mul(timestamp, scale, result);
355+
if (overflow != 0) {
356+
// given the current scale, this is the smallest supported timestamp
357+
const int64_t min_scalable_timestamp = (NPY_MIN_INT64 + 1) / scale;
358+
359+
// if the requested timestamp is just below that, doing the calculation
360+
// in reverse from the absolute minimum timestamp could work
361+
if (timestamp == min_scalable_timestamp - 1) {
362+
const int64_t min_offset = (NPY_MIN_INT64 + 1) % scale;
363+
if (checked_int64_sub(offset, min_offset, result) != 0) {
364+
return 1;
365+
}
366+
367+
return checked_int64_add(NPY_MIN_INT64 + 1, *result, result);
368+
}
369+
370+
return 1;
371+
}
372+
373+
return checked_int64_add(*result, offset, result);
374+
}
375+
341376
static inline int scaleYearToEpoch(int64_t year, int64_t *result) {
342377
return checked_int64_sub(year, 1970, result);
343378
}
@@ -362,46 +397,6 @@ static inline int scaleDaysToWeeks(int64_t days, int64_t *result) {
362397
}
363398
}
364399

365-
static inline int scaleDaysToHours(int64_t days, int64_t *result) {
366-
return checked_int64_mul(days, 24, result);
367-
}
368-
369-
static inline int scaleHoursToMinutes(int64_t hours, int64_t *result) {
370-
return checked_int64_mul(hours, 60, result);
371-
}
372-
373-
static inline int scaleMinutesToSeconds(int64_t minutes, int64_t *result) {
374-
return checked_int64_mul(minutes, 60, result);
375-
}
376-
377-
static inline int scaleSecondsToMilliseconds(int64_t seconds, int64_t *result) {
378-
return checked_int64_mul(seconds, 1000, result);
379-
}
380-
381-
static inline int scaleSecondsToMicroseconds(int64_t seconds, int64_t *result) {
382-
return checked_int64_mul(seconds, 1000000, result);
383-
}
384-
385-
static inline int scaleMicrosecondsToNanoseconds(int64_t microseconds,
386-
int64_t *result) {
387-
return checked_int64_mul(microseconds, 1000, result);
388-
}
389-
390-
static inline int scaleMicrosecondsToPicoseconds(int64_t microseconds,
391-
int64_t *result) {
392-
return checked_int64_mul(microseconds, 1000000, result);
393-
}
394-
395-
static inline int64_t scalePicosecondsToFemtoseconds(int64_t picoseconds,
396-
int64_t *result) {
397-
return checked_int64_mul(picoseconds, 1000, result);
398-
}
399-
400-
static inline int64_t scalePicosecondsToAttoseconds(int64_t picoseconds,
401-
int64_t *result) {
402-
return checked_int64_mul(picoseconds, 1000000, result);
403-
}
404-
405400
/*
406401
* Converts a datetime from a datetimestruct to a datetime based
407402
* on a metadata unit. Returns -1 on and sets PyErr on error.
@@ -440,80 +435,50 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
440435
}
441436

442437
int64_t hours;
443-
PD_CHECK_OVERFLOW(scaleDaysToHours(days, &hours));
444-
PD_CHECK_OVERFLOW(checked_int64_add(hours, dts->hour, &hours));
438+
PD_CHECK_OVERFLOW(scaleAndOffset(days, 24, dts->hour, &hours));
445439

446440
if (base == NPY_FR_h) {
447441
return hours;
448442
}
449443

450444
int64_t minutes;
451-
PD_CHECK_OVERFLOW(scaleHoursToMinutes(hours, &minutes));
452-
PD_CHECK_OVERFLOW(checked_int64_add(minutes, dts->min, &minutes));
445+
PD_CHECK_OVERFLOW(scaleAndOffset(hours, 60, dts->min, &minutes));
453446

454447
if (base == NPY_FR_m) {
455448
return minutes;
456449
}
457450

458451
int64_t seconds;
459-
PD_CHECK_OVERFLOW(scaleMinutesToSeconds(minutes, &seconds));
460-
PD_CHECK_OVERFLOW(checked_int64_add(seconds, dts->sec, &seconds));
452+
PD_CHECK_OVERFLOW(scaleAndOffset(minutes, 60, dts->sec, &seconds));
461453

462454
if (base == NPY_FR_s) {
463455
return seconds;
464456
}
465457

466458
if (base == NPY_FR_ms) {
467459
int64_t milliseconds;
468-
PD_CHECK_OVERFLOW(scaleSecondsToMilliseconds(seconds, &milliseconds));
469460
PD_CHECK_OVERFLOW(
470-
checked_int64_add(milliseconds, dts->us / 1000, &milliseconds));
471-
461+
scaleAndOffset(seconds, 1000, dts->us / 1000, &milliseconds));
472462
return milliseconds;
473463
}
474464

475465
int64_t microseconds;
476-
PD_CHECK_OVERFLOW(scaleSecondsToMicroseconds(seconds, &microseconds));
477-
PD_CHECK_OVERFLOW(checked_int64_add(microseconds, dts->us, &microseconds));
466+
PD_CHECK_OVERFLOW(scaleAndOffset(seconds, 1000000, dts->us, &microseconds));
478467

479468
if (base == NPY_FR_us) {
480469
return microseconds;
481470
}
482471

483472
if (base == NPY_FR_ns) {
484473
int64_t nanoseconds;
485-
486-
// GH-57150: handle near-minimum valid timestamps
487-
const int overflow = scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds);
488-
if (overflow != 0) {
489-
// scaling overflows for pd.Timestamp.min (1677-09-21 00:12:43.145224193)
490-
const int64_t near_min_scaled_microseconds = -9223372036854776LL;
491-
if (microseconds == near_min_scaled_microseconds) {
492-
// reverse calculation to avoid overflow
493-
const int near_min_nanoseconds = 193;
494-
int64_t nanoseconds_adder;
495-
PD_CHECK_OVERFLOW(
496-
checked_int64_sub(dts->ps / 1000, near_min_nanoseconds, &nanoseconds_adder));
497-
498-
// pd.Timestamp.min
499-
nanoseconds = NPY_MIN_INT64 + 1;
500-
PD_CHECK_OVERFLOW(
501-
checked_int64_add(nanoseconds, nanoseconds_adder, &nanoseconds));
502-
503-
return nanoseconds;
504-
}
505-
}
506-
507-
PD_CHECK_OVERFLOW(overflow);
508474
PD_CHECK_OVERFLOW(
509-
checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds));
510-
475+
scaleAndOffset(microseconds, 1000, dts->ps / 1000, &nanoseconds));
511476
return nanoseconds;
512477
}
513478

514479
int64_t picoseconds;
515-
PD_CHECK_OVERFLOW(scaleMicrosecondsToPicoseconds(microseconds, &picoseconds));
516-
PD_CHECK_OVERFLOW(checked_int64_add(picoseconds, dts->ps, &picoseconds));
480+
PD_CHECK_OVERFLOW(
481+
scaleAndOffset(microseconds, 1000000, dts->ps, &picoseconds));
517482

518483
if (base == NPY_FR_ps) {
519484
return picoseconds;
@@ -522,16 +487,14 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
522487
if (base == NPY_FR_fs) {
523488
int64_t femtoseconds;
524489
PD_CHECK_OVERFLOW(
525-
scalePicosecondsToFemtoseconds(picoseconds, &femtoseconds));
526-
PD_CHECK_OVERFLOW(
527-
checked_int64_add(femtoseconds, dts->as / 1000, &femtoseconds));
490+
scaleAndOffset(picoseconds, 1000, dts->as / 1000, &femtoseconds));
528491
return femtoseconds;
529492
}
530493

531494
if (base == NPY_FR_as) {
532495
int64_t attoseconds;
533-
PD_CHECK_OVERFLOW(scalePicosecondsToAttoseconds(picoseconds, &attoseconds));
534-
PD_CHECK_OVERFLOW(checked_int64_add(attoseconds, dts->as, &attoseconds));
496+
PD_CHECK_OVERFLOW(
497+
scaleAndOffset(picoseconds, 1000000, dts->as, &attoseconds));
535498
return attoseconds;
536499
}
537500

0 commit comments

Comments
 (0)