diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 8da33a46e79c6..bebd40b4adde2 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -31,6 +31,7 @@ Fixed Regressions - Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`) - Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`) - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) +- Fixed regression in :func:`to_timedelta` losing precision when converting floating data to ``Timedelta`` data (:issue:`25077`). - Fixed pip installing from source into an environment without NumPy (:issue:`25193`) - Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 37aa05659b70f..5918c7963acf7 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -246,9 +246,11 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): return iresult.base # .base to access underlying np.ndarray -cdef inline int64_t cast_from_unit(object ts, object unit) except? -1: - """ return a casting of the unit represented to nanoseconds - round the fractional part of a float to our precision, p """ +cpdef inline object precision_from_unit(object unit): + """ + Return a casting of the unit represented to nanoseconds + the precision + to round the fractional part. + """ cdef: int64_t m int p @@ -285,6 +287,17 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1: p = 0 else: raise ValueError("cannot cast unit {unit}".format(unit=unit)) + return m, p + + +cdef inline int64_t cast_from_unit(object ts, object unit) except? -1: + """ return a casting of the unit represented to nanoseconds + round the fractional part of a float to our precision, p """ + cdef: + int64_t m + int p + + m, p = precision_from_unit(unit) # just give me the unit back if ts is None: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 74fe8072e6924..1badb476085bf 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -11,7 +11,7 @@ from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import ( - array_to_timedelta64, parse_timedelta_unit) + array_to_timedelta64, parse_timedelta_unit, precision_from_unit) import pandas.compat as compat from pandas.util._decorators import Appender @@ -918,12 +918,15 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): copy = copy and not copy_made elif is_float_dtype(data.dtype): - # treat as multiples of the given unit. If after converting to nanos, - # there are fractional components left, these are truncated - # (i.e. NOT rounded) + # cast the unit, multiply base/frace separately + # to avoid precision issues from float -> int mask = np.isnan(data) - coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns') - data = (coeff * data).astype(np.int64).view('timedelta64[ns]') + m, p = precision_from_unit(unit) + base = data.astype(np.int64) + frac = data - base + if p: + frac = np.round(frac, p) + data = (base * m + (frac * m).astype(np.int64)).view('timedelta64[ns]') data[mask] = iNaT copy = False diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index 58482a174dfd1..819184d4b14f3 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -181,3 +181,10 @@ def test_to_timedelta_on_missing_values(self): actual = pd.to_timedelta(pd.NaT) assert actual.value == timedelta_NaT.astype('int64') + + def test_to_timedelta_float(self): + # https://github.com/pandas-dev/pandas/issues/25077 + arr = np.arange(0, 1, 1e-6)[-10:] + result = pd.to_timedelta(arr, unit='s') + expected_asi8 = np.arange(999990000, int(1e9), 1000, dtype='int64') + tm.assert_numpy_array_equal(result.asi8, expected_asi8)