Skip to content

Commit 3bb8ad1

Browse files
authored
BUG: dt64 astype silent overflows (#55979)
* BUG: dt64 astype silent overflows * GH ref
1 parent 55cd96a commit 3bb8ad1

File tree

4 files changed

+34
-13
lines changed

4 files changed

+34
-13
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ Datetimelike
347347
- Bug in :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` always caching :meth:`Index.is_unique` as ``True`` when first value in index is ``NaT`` (:issue:`55755`)
348348
- Bug in :meth:`Index.view` to a datetime64 dtype with non-supported resolution incorrectly raising (:issue:`55710`)
349349
- Bug in :meth:`Tick.delta` with very large ticks raising ``OverflowError`` instead of ``OutOfBoundsTimedelta`` (:issue:`55503`)
350+
- Bug in ``.astype`` converting from a higher-resolution ``datetime64`` dtype to a lower-resolution ``datetime64`` dtype (e.g. ``datetime64[us]->datetim64[ms]``) silently overflowing with values near the lower implementation bound (:issue:`55979`)
350351
- Bug in adding or subtracting a :class:`Week` offset to a ``datetime64`` :class:`Series`, :class:`Index`, or :class:`DataFrame` column with non-nanosecond resolution returning incorrect results (:issue:`55583`)
351352
- Bug in addition or subtraction of :class:`BusinessDay` offset with ``offset`` attribute to non-nanosecond :class:`Index`, :class:`Series`, or :class:`DataFrame` column giving incorrect results (:issue:`55608`)
352353
- Bug in addition or subtraction of :class:`DateOffset` objects with microsecond components to ``datetime64`` :class:`Index`, :class:`Series`, or :class:`DataFrame` columns with non-nanosecond resolution (:issue:`55595`)

pandas/_libs/tslibs/conversion.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -716,7 +716,7 @@ cdef int64_t parse_pydatetime(
716716
result = _ts.value
717717
else:
718718
if isinstance(val, _Timestamp):
719-
result = (<_Timestamp>val)._as_creso(creso, round_ok=False)._value
719+
result = (<_Timestamp>val)._as_creso(creso, round_ok=True)._value
720720
else:
721721
result = pydatetime_to_dt64(val, dts, reso=creso)
722722
return result

pandas/_libs/tslibs/np_datetime.pyx

+16-12
Original file line numberDiff line numberDiff line change
@@ -365,13 +365,10 @@ cpdef ndarray astype_overflowsafe(
365365
return values
366366

367367
elif from_unit > to_unit:
368-
if round_ok:
369-
# e.g. ns -> us, so there is no risk of overflow, so we can use
370-
# numpy's astype safely. Note there _is_ risk of truncation.
371-
return values.astype(dtype)
372-
else:
373-
iresult2 = astype_round_check(values.view("i8"), from_unit, to_unit)
374-
return iresult2.view(dtype)
368+
iresult2 = _astype_overflowsafe_to_smaller_unit(
369+
values.view("i8"), from_unit, to_unit, round_ok=round_ok
370+
)
371+
return iresult2.view(dtype)
375372

376373
if (<object>values).dtype.byteorder == ">":
377374
# GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap
@@ -502,13 +499,20 @@ cdef int op_to_op_code(op):
502499
return Py_GT
503500

504501

505-
cdef ndarray astype_round_check(
502+
cdef ndarray _astype_overflowsafe_to_smaller_unit(
506503
ndarray i8values,
507504
NPY_DATETIMEUNIT from_unit,
508-
NPY_DATETIMEUNIT to_unit
505+
NPY_DATETIMEUNIT to_unit,
506+
bint round_ok,
509507
):
510-
# cases with from_unit > to_unit, e.g. ns->us, raise if the conversion
511-
# involves truncation, e.g. 1500ns->1us
508+
"""
509+
Overflow-safe conversion for cases with from_unit > to_unit, e.g. ns->us.
510+
In addition for checking for overflows (which can occur near the lower
511+
implementation bound, see numpy#22346), this checks for truncation,
512+
e.g. 1500ns->1us.
513+
"""
514+
# e.g. test_astype_ns_to_ms_near_bounds is a case with round_ok=True where
515+
# just using numpy's astype silently fails
512516
cdef:
513517
Py_ssize_t i, N = i8values.size
514518

@@ -531,7 +535,7 @@ cdef ndarray astype_round_check(
531535
new_value = NPY_DATETIME_NAT
532536
else:
533537
new_value, mod = divmod(value, mult)
534-
if mod != 0:
538+
if not round_ok and mod != 0:
535539
# TODO: avoid runtime import
536540
from pandas._libs.tslibs.dtypes import npy_unit_to_abbrev
537541
from_abbrev = npy_unit_to_abbrev(from_unit)

pandas/tests/arrays/test_datetimes.py

+16
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,22 @@ def test_cmp_dt64_arraylike_tznaive(self, comparison_op):
309309

310310

311311
class TestDatetimeArray:
312+
def test_astype_ns_to_ms_near_bounds(self):
313+
# GH#55979
314+
ts = pd.Timestamp("1677-09-21 00:12:43.145225")
315+
target = ts.as_unit("ms")
316+
317+
dta = DatetimeArray._from_sequence([ts], dtype="M8[ns]")
318+
assert (dta.view("i8") == ts.as_unit("ns").value).all()
319+
320+
result = dta.astype("M8[ms]")
321+
assert result[0] == target
322+
323+
expected = DatetimeArray._from_sequence([ts], dtype="M8[ms]")
324+
assert (expected.view("i8") == target._value).all()
325+
326+
tm.assert_datetime_array_equal(result, expected)
327+
312328
def test_astype_non_nano_tznaive(self):
313329
dti = pd.date_range("2016-01-01", periods=3)
314330

0 commit comments

Comments
 (0)