Skip to content

BUG: Ensure to_datetime raises errors for out-of-bounds scalar inputs #60744

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
40 changes: 36 additions & 4 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,11 +479,20 @@ def _array_strptime_with_fallback(
return Index(result, dtype=result.dtype, name=name)


def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> DatetimeIndex:
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making the annotation -> DatetimeIndex reflects that reality more precisely and helps both developers and tooling (like mypy) be certain of the return type.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If errors=="ignore" we would get an Index[object] back so this is correct

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. Thanks for pointing this out.

"""
to_datetime specalized to the case where a 'unit' is passed.
to_datetime specialized to the case where a 'unit' is passed.

Note: This function currently treats values at the upper bound differently
from values at the lower bound.
For upper bound, it raises OutOfBoundsDatetime.
For lower bound, it returns NaT.
"""
arg = extract_array(arg, extract_numpy=True)
# Fix GH#60677
# Ensure scalar and array-like both become arrays
# (so both paths use the same code).
arg = np.atleast_1d(arg)

# GH#30050 pass an ndarray to tslib.array_to_datetime
# because it expects an ndarray argument
Expand All @@ -496,6 +505,31 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
if arg.dtype.kind in "iu":
# Note we can't do "f" here because that could induce unwanted
# rounding GH#14156, GH#20445
# Fix GH#60677
# ------------------------------------------------
# A) **Check for uint64 values above int64 max**
# so we don't accidentally wrap around to -1, etc.
# ------------------------------------------------
if arg.dtype.kind == "u": # unsigned
above_max = arg > np.iinfo(np.int64).max
if above_max.any():
if errors == "raise":
raise OutOfBoundsDatetime(
"Cannot convert uint64 values above"
f"{np.iinfo(np.int64).max}"
"to a 64-bit signed datetime64[ns]."
)
else:
# For errors != "raise" (e.g. "coerce" or "ignore"),
# we can replace out-of-range entries with NaN (-> NaT),
# then switch to the fallback object path:
arg = arg.astype(object)
arg[above_max] = np.nan
return _to_datetime_with_unit(arg, unit, name, utc, errors)

# ------------------------------------------------
# B) Proceed with normal numeric -> datetime logic
# ------------------------------------------------
arr = arg.astype(f"datetime64[{unit}]", copy=False)
try:
arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False)
Expand Down Expand Up @@ -532,8 +566,6 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
)

result = DatetimeIndex(arr, name=name)
if not isinstance(result, DatetimeIndex):
return result

# GH#23758: We may still need to localize the result with tz
# GH#25546: Apply tz_parsed first (from arg), then tz (from caller)
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -3689,3 +3689,30 @@ def test_to_datetime_wrapped_datetime64_ps():
["1970-01-01 00:00:01.901901901"], dtype="datetime64[ns]", freq=None
)
tm.assert_index_equal(result, expected)


def test_to_datetime_scalar_out_of_bounds():
"""Ensure pd.to_datetime raises an error for out-of-bounds scalar values."""
uint64_max = np.iinfo("uint64").max
int64_min = np.iinfo("int64").min

# Expect an OverflowError when passing uint64_max as a scalar
with pytest.raises(OutOfBoundsDatetime):
to_datetime(uint64_max, unit="ns")

# Expect the same behavior when passing it as a list
with pytest.raises(OutOfBoundsDatetime):
to_datetime([uint64_max], unit="ns")

# Expect NAT when passing int64_min as a scalar
value = to_datetime(int64_min, unit="ns")
assert value is NaT

# Expect the same behavior when passing it as a list
value = to_datetime([int64_min], unit="ns")
assert value[0] is NaT

# Test a valid value (should not raise an error)
valid_timestamp = 1_700_000_000_000_000_000 # A reasonable nanosecond timestamp
result = to_datetime(valid_timestamp, unit="ns")
assert isinstance(result, Timestamp)
Loading