Skip to content

Commit d66b5b5

Browse files
robert-schmidtkeRobert Schmidtke
authored andcommitted
BUG: Fix near-minimum timestamp handling (pandas-dev#57314)
* attempt failing test * expand test for demonstration purposes * fix near-minimum timestamp overflow when scaling from microseconds to nanoseconds * minor refactor * add comments around specifically handling near-minimum microsecond and nanosecond timestamps * consolidate comments --------- Co-authored-by: Robert Schmidtke <[email protected]>
1 parent 34d8c90 commit d66b5b5

File tree

3 files changed

+32
-4
lines changed

3 files changed

+32
-4
lines changed

doc/source/whatsnew/v2.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Fixed regressions
2121
~~~~~~~~~~~~~~~~~
2222
- Fixed memory leak in :func:`read_csv` (:issue:`57039`)
2323
- Fixed performance regression in :meth:`Series.combine_first` (:issue:`55845`)
24+
- Fixed regression causing overflow for near-minimum timestamps (:issue:`57150`)
2425
- Fixed regression in :func:`concat` changing long-standing behavior that always sorted the non-concatenation axis when the axis was a :class:`DatetimeIndex` (:issue:`57006`)
2526
- Fixed regression in :func:`merge_ordered` raising ``TypeError`` for ``fill_method="ffill"`` and ``how="left"`` (:issue:`57010`)
2627
- Fixed regression in :func:`pandas.testing.assert_series_equal` defaulting to ``check_exact=True`` when checking the :class:`Index` (:issue:`57067`)

pandas/_libs/src/vendored/numpy/datetime/np_datetime.c

+14-4
Original file line numberDiff line numberDiff line change
@@ -482,10 +482,20 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
482482

483483
if (base == NPY_FR_ns) {
484484
int64_t nanoseconds;
485-
PD_CHECK_OVERFLOW(
486-
scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds));
487-
PD_CHECK_OVERFLOW(
488-
checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds));
485+
486+
// Minimum valid timestamp in nanoseconds (1677-09-21 00:12:43.145224193).
487+
const int64_t min_nanoseconds = NPY_MIN_INT64 + 1;
488+
if (microseconds == min_nanoseconds / 1000 - 1) {
489+
// For values within one microsecond of min_nanoseconds, use it as base
490+
// and offset it with nanosecond delta to avoid overflow during scaling.
491+
PD_CHECK_OVERFLOW(checked_int64_add(
492+
min_nanoseconds, (dts->ps - _NS_MIN_DTS.ps) / 1000, &nanoseconds));
493+
} else {
494+
PD_CHECK_OVERFLOW(
495+
scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds));
496+
PD_CHECK_OVERFLOW(
497+
checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds));
498+
}
489499

490500
return nanoseconds;
491501
}

pandas/tests/tslibs/test_array_to_datetime.py

+17
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,23 @@ def test_to_datetime_barely_out_of_bounds():
262262
tslib.array_to_datetime(arr)
263263

264264

265+
@pytest.mark.parametrize(
266+
"timestamp",
267+
[
268+
# Close enough to bounds that scaling micros to nanos overflows
269+
# but adding nanos would result in an in-bounds datetime.
270+
"1677-09-21T00:12:43.145224193",
271+
"1677-09-21T00:12:43.145224999",
272+
# this always worked
273+
"1677-09-21T00:12:43.145225000",
274+
],
275+
)
276+
def test_to_datetime_barely_inside_bounds(timestamp):
277+
# see gh-57150
278+
result, _ = tslib.array_to_datetime(np.array([timestamp], dtype=object))
279+
tm.assert_numpy_array_equal(result, np.array([timestamp], dtype="M8[ns]"))
280+
281+
265282
class SubDatetime(datetime):
266283
pass
267284

0 commit comments

Comments
 (0)