diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7555c8b68a4f7..1894ce4ee12d9 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -950,6 +950,7 @@ Datetimelike - Bug in :func:`to_datetime` was failing to parse date strings ``'today'`` and ``'now'`` if ``format`` was not ISO8601 (:issue:`50359`) - Bug in :func:`Timestamp.utctimetuple` raising a ``TypeError`` (:issue:`32174`) - Bug in :func:`to_datetime` was raising ``ValueError`` when parsing mixed-offset :class:`Timestamp` with ``errors='ignore'`` (:issue:`50585`) +- Bug in :func:`to_datetime` was incorrectly handling floating-point inputs within 1 ``unit`` of the overflow boundaries (:issue:`50183`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index eaa7339f3747a..152bfcb8822a4 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -514,11 +514,9 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: elif arg.dtype.kind == "f": mult, _ = precision_from_unit(unit) - iresult = arg.astype("i8") mask = np.isnan(arg) | (arg == iNaT) - iresult[mask] = 0 - - fvalues = iresult.astype("f8") * mult + fvalues = (arg * mult).astype("f8", copy=False) + fvalues[mask] = 0 if (fvalues < Timestamp.min.value).any() or ( fvalues > Timestamp.max.value @@ -528,11 +526,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: return _to_datetime_with_unit(arg, unit, name, utc, errors) raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'") - # TODO: is fresult meaningfully different from fvalues? - fresult = (arg * mult).astype("f8") - fresult[mask] = 0 - - arr = fresult.astype("M8[ns]", copy=False) + arr = fvalues.astype("M8[ns]", copy=False) arr[mask] = np.datetime64("NaT", "ns") tz_parsed = None diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index fcbe33a555f4f..6a6a659b80dd9 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1935,6 +1935,27 @@ def test_to_timestamp_unit_coerce(self, bad_val): result = to_datetime([1, 2, bad_val], unit="D", errors="coerce") tm.assert_index_equal(result, expected) + def test_float_to_datetime_raise_near_bounds(self): + # GH50183 + msg = "cannot convert input with unit 'D'" + oneday_in_ns = 1e9 * 60 * 60 * 24 + tsmax_in_days = 2**63 / oneday_in_ns # 2**63 ns, in days + # just in bounds + should_succeed = Series( + [0, tsmax_in_days - 0.005, -tsmax_in_days + 0.005], dtype=float + ) + expected = (should_succeed * oneday_in_ns).astype(np.int64) + for error_mode in ["raise", "coerce", "ignore"]: + result1 = to_datetime(should_succeed, unit="D", errors=error_mode) + tm.assert_almost_equal(result1.astype(np.int64), expected, rtol=1e-10) + # just out of bounds + should_fail1 = Series([0, tsmax_in_days + 0.005], dtype=float) + should_fail2 = Series([0, -tsmax_in_days - 0.005], dtype=float) + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime(should_fail1, unit="D", errors="raise") + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime(should_fail2, unit="D", errors="raise") + class TestToDatetimeDataFrame: @pytest.fixture