Skip to content

Commit 0b04174

Browse files
Improve to_datetime bounds checking (#50183)
* add test for float to_datetime near overflow bounds * fix float to_datetime near overflow bounds * fix typo and formatting * fix formatting * fix test to not fail on rounding differences * don't use approximate comparison on datetimes, it doesn't work * also can't convert datetime to float * match dtypes * TST: don't try to use non-integer years (see #50301) * TST: don't cross an integer (tsmax_in_days happens to be close to an integer, and this is a test of rounding) * PERF: remove unnecessary copy * add whatsnew
1 parent 99f98de commit 0b04174

File tree

3 files changed

+25
-9
lines changed

3 files changed

+25
-9
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,7 @@ Datetimelike
950950
- Bug in :func:`to_datetime` was failing to parse date strings ``'today'`` and ``'now'`` if ``format`` was not ISO8601 (:issue:`50359`)
951951
- Bug in :func:`Timestamp.utctimetuple` raising a ``TypeError`` (:issue:`32174`)
952952
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing mixed-offset :class:`Timestamp` with ``errors='ignore'`` (:issue:`50585`)
953+
- Bug in :func:`to_datetime` was incorrectly handling floating-point inputs within 1 ``unit`` of the overflow boundaries (:issue:`50183`)
953954

954955
Timedelta
955956
^^^^^^^^^

pandas/core/tools/datetimes.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -514,11 +514,9 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
514514
elif arg.dtype.kind == "f":
515515
mult, _ = precision_from_unit(unit)
516516

517-
iresult = arg.astype("i8")
518517
mask = np.isnan(arg) | (arg == iNaT)
519-
iresult[mask] = 0
520-
521-
fvalues = iresult.astype("f8") * mult
518+
fvalues = (arg * mult).astype("f8", copy=False)
519+
fvalues[mask] = 0
522520

523521
if (fvalues < Timestamp.min.value).any() or (
524522
fvalues > Timestamp.max.value
@@ -528,11 +526,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
528526
return _to_datetime_with_unit(arg, unit, name, utc, errors)
529527
raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
530528

531-
# TODO: is fresult meaningfully different from fvalues?
532-
fresult = (arg * mult).astype("f8")
533-
fresult[mask] = 0
534-
535-
arr = fresult.astype("M8[ns]", copy=False)
529+
arr = fvalues.astype("M8[ns]", copy=False)
536530
arr[mask] = np.datetime64("NaT", "ns")
537531

538532
tz_parsed = None

pandas/tests/tools/test_to_datetime.py

+21
Original file line numberDiff line numberDiff line change
@@ -1935,6 +1935,27 @@ def test_to_timestamp_unit_coerce(self, bad_val):
19351935
result = to_datetime([1, 2, bad_val], unit="D", errors="coerce")
19361936
tm.assert_index_equal(result, expected)
19371937

1938+
def test_float_to_datetime_raise_near_bounds(self):
1939+
# GH50183
1940+
msg = "cannot convert input with unit 'D'"
1941+
oneday_in_ns = 1e9 * 60 * 60 * 24
1942+
tsmax_in_days = 2**63 / oneday_in_ns # 2**63 ns, in days
1943+
# just in bounds
1944+
should_succeed = Series(
1945+
[0, tsmax_in_days - 0.005, -tsmax_in_days + 0.005], dtype=float
1946+
)
1947+
expected = (should_succeed * oneday_in_ns).astype(np.int64)
1948+
for error_mode in ["raise", "coerce", "ignore"]:
1949+
result1 = to_datetime(should_succeed, unit="D", errors=error_mode)
1950+
tm.assert_almost_equal(result1.astype(np.int64), expected, rtol=1e-10)
1951+
# just out of bounds
1952+
should_fail1 = Series([0, tsmax_in_days + 0.005], dtype=float)
1953+
should_fail2 = Series([0, -tsmax_in_days - 0.005], dtype=float)
1954+
with pytest.raises(OutOfBoundsDatetime, match=msg):
1955+
to_datetime(should_fail1, unit="D", errors="raise")
1956+
with pytest.raises(OutOfBoundsDatetime, match=msg):
1957+
to_datetime(should_fail2, unit="D", errors="raise")
1958+
19381959

19391960
class TestToDatetimeDataFrame:
19401961
@pytest.fixture

0 commit comments

Comments
 (0)