Backport PR pandas-dev#25651: REGR: to_timedelta precision issues with floating data

jorisvandenbossche · MeeseeksDev[bot] · commit dc66c9bb7ca7 · 2019-03-12T12:43:08.000Z
diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst
@@ -31,6 +31,7 @@ Fixed Regressions
 - Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`)
 - Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`)
 - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`)
+- Fixed regression in :func:`to_timedelta` losing precision when converting floating data to ``Timedelta`` data (:issue:`25077`).
 - Fixed pip installing from source into an environment without NumPy (:issue:`25193`)
 - Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`)
 
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -246,9 +246,11 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
     return iresult.base  # .base to access underlying np.ndarray
 
 
-cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
-    """ return a casting of the unit represented to nanoseconds
-        round the fractional part of a float to our precision, p """
+cpdef inline object precision_from_unit(object unit):
+    """
+    Return a casting of the unit represented to nanoseconds + the precision
+    to round the fractional part.
+    """
     cdef:
         int64_t m
         int p
@@ -285,6 +287,17 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
         p = 0
     else:
         raise ValueError("cannot cast unit {unit}".format(unit=unit))
+    return m, p
+
+
+cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
+    """ return a casting of the unit represented to nanoseconds
+        round the fractional part of a float to our precision, p """
+    cdef:
+        int64_t m
+        int p
+
+    m, p = precision_from_unit(unit)
 
     # just give me the unit back
     if ts is None:
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -11,7 +11,7 @@
 from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
 from pandas._libs.tslibs.fields import get_timedelta_field
 from pandas._libs.tslibs.timedeltas import (
-    array_to_timedelta64, parse_timedelta_unit)
+    array_to_timedelta64, parse_timedelta_unit, precision_from_unit)
 import pandas.compat as compat
 from pandas.util._decorators import Appender
 
@@ -918,12 +918,15 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
         copy = copy and not copy_made
 
     elif is_float_dtype(data.dtype):
-        # treat as multiples of the given unit.  If after converting to nanos,
-        #  there are fractional components left, these are truncated
-        #  (i.e. NOT rounded)
+        # cast the unit, multiply base/frace separately
+        # to avoid precision issues from float -> int
         mask = np.isnan(data)
-        coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns')
-        data = (coeff * data).astype(np.int64).view('timedelta64[ns]')
+        m, p = precision_from_unit(unit)
+        base = data.astype(np.int64)
+        frac = data - base
+        if p:
+            frac = np.round(frac, p)
+        data = (base * m + (frac * m).astype(np.int64)).view('timedelta64[ns]')
         data[mask] = iNaT
         copy = False
 
diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py
@@ -173,3 +173,10 @@ def test_to_timedelta_on_missing_values(self):
 
         actual = pd.to_timedelta(pd.NaT)
         assert actual.value == timedelta_NaT.astype('int64')
+
+    def test_to_timedelta_float(self):
+        # https://github.com/pandas-dev/pandas/issues/25077
+        arr = np.arange(0, 1, 1e-6)[-10:]
+        result = pd.to_timedelta(arr, unit='s')
+        expected_asi8 = np.arange(999990000, int(1e9), 1000, dtype='int64')
+        tm.assert_numpy_array_equal(result.asi8, expected_asi8)