Skip to content

REGR: to_timedelta precision issues with floating data #25651

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 12, 2019
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Fixed Regressions
- Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`)
- Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`)
- Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`)
- Fixed regression in :func:`to_timedelta` losing precision when converting floating data to ``Timedelta`` data (:issue:`25077`).
- Fixed pip installing from source into an environment without NumPy (:issue:`25193`)
- Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`)

Expand Down
19 changes: 16 additions & 3 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,11 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
return iresult.base # .base to access underlying np.ndarray


cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
""" return a casting of the unit represented to nanoseconds
round the fractional part of a float to our precision, p """
cpdef inline object precision_from_unit(object unit):
"""
Return a casting of the unit represented to nanoseconds + the precision
to round the fractional part.
"""
cdef:
int64_t m
int p
Expand Down Expand Up @@ -285,6 +287,17 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
p = 0
else:
raise ValueError("cannot cast unit {unit}".format(unit=unit))
return m, p


cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
""" return a casting of the unit represented to nanoseconds
round the fractional part of a float to our precision, p """
cdef:
int64_t m
int p

m, p = precision_from_unit(unit)

# just give me the unit back
if ts is None:
Expand Down
15 changes: 9 additions & 6 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
from pandas._libs.tslibs.fields import get_timedelta_field
from pandas._libs.tslibs.timedeltas import (
array_to_timedelta64, parse_timedelta_unit)
array_to_timedelta64, parse_timedelta_unit, precision_from_unit)
import pandas.compat as compat
from pandas.util._decorators import Appender

Expand Down Expand Up @@ -918,12 +918,15 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
copy = copy and not copy_made

elif is_float_dtype(data.dtype):
# treat as multiples of the given unit. If after converting to nanos,
# there are fractional components left, these are truncated
# (i.e. NOT rounded)
# cast the unit, multiply base/frace separately
# to avoid precision issues from float -> int
mask = np.isnan(data)
coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns')
data = (coeff * data).astype(np.int64).view('timedelta64[ns]')
m, p = precision_from_unit(unit)
base = data.astype(np.int64)
frac = data - base
if p:
frac = np.round(frac, p)
data = (base * m + (frac * m).astype(np.int64)).view('timedelta64[ns]')
data[mask] = iNaT
copy = False

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexes/timedeltas/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,10 @@ def test_to_timedelta_on_missing_values(self):

actual = pd.to_timedelta(pd.NaT)
assert actual.value == timedelta_NaT.astype('int64')

def test_to_timedelta_float(self):
# https://github.com/pandas-dev/pandas/issues/25077
arr = np.arange(0, 1, 1e-6)[-10:]
result = pd.to_timedelta(arr, unit='s')
expected_asi8 = np.arange(999990000, int(1e9), 1000, dtype='int64')
tm.assert_numpy_array_equal(result.asi8, expected_asi8)