Skip to content

REGR: to_timedelta precision issues with floating data #25651

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 12, 2019
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Fixed Regressions
- Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`)
- Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`)
- Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`)
- Fixed regression in :func:`to_timedelta` loosing precision when converting floating data to ``Timedelta`` data (:issue:`25077`).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

loosing -> losing

- Fixed pip installing from source into an environment without NumPy (:issue:`25193`)

.. _whatsnew_0242.enhancements:
Expand Down
70 changes: 43 additions & 27 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -246,45 +246,61 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
return iresult.base # .base to access underlying np.ndarray


cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
""" return a casting of the unit represented to nanoseconds
round the fractional part of a float to our precision, p """
cdef:
int64_t m
int p

cdef inline int _precision_from_unit(object unit,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are you going thru all of this trouble? use cpdef

int64_t* m, int* p) except? -1:
if unit == 'Y':
m = 1000000000L * 31556952
p = 9
m[0] = 1000000000L * 31556952
p[0] = 9
elif unit == 'M':
m = 1000000000L * 2629746
p = 9
m[0] = 1000000000L * 2629746
p[0] = 9
elif unit == 'W':
m = 1000000000L * DAY_SECONDS * 7
p = 9
m[0] = 1000000000L * DAY_SECONDS * 7
p[0] = 9
elif unit == 'D' or unit == 'd':
m = 1000000000L * DAY_SECONDS
p = 9
m[0] = 1000000000L * DAY_SECONDS
p[0] = 9
elif unit == 'h':
m = 1000000000L * 3600
p = 9
m[0] = 1000000000L * 3600
p[0] = 9
elif unit == 'm':
m = 1000000000L * 60
p = 9
m[0] = 1000000000L * 60
p[0] = 9
elif unit == 's':
m = 1000000000L
p = 9
m[0] = 1000000000L
p[0] = 9
elif unit == 'ms':
m = 1000000L
p = 6
m[0] = 1000000L
p[0] = 6
elif unit == 'us':
m = 1000L
p = 3
m[0] = 1000L
p[0] = 3
elif unit == 'ns' or unit is None:
m = 1L
p = 0
m[0] = 1L
p[0] = 0
else:
raise ValueError("cannot cast unit {unit}".format(unit=unit))
return 0


def precision_from_unit(object unit):
cdef:
int64_t m
int p

_precision_from_unit(unit, &m, &p)

return m, p


cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
""" return a casting of the unit represented to nanoseconds
round the fractional part of a float to our precision, p """
cdef:
int64_t m
int p

_precision_from_unit(unit, &m, &p)

# just give me the unit back
if ts is None:
Expand Down
15 changes: 9 additions & 6 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
from pandas._libs.tslibs.fields import get_timedelta_field
from pandas._libs.tslibs.timedeltas import (
array_to_timedelta64, parse_timedelta_unit)
array_to_timedelta64, parse_timedelta_unit, precision_from_unit)
import pandas.compat as compat
from pandas.util._decorators import Appender

Expand Down Expand Up @@ -918,12 +918,15 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
copy = copy and not copy_made

elif is_float_dtype(data.dtype):
# treat as multiples of the given unit. If after converting to nanos,
# there are fractional components left, these are truncated
# (i.e. NOT rounded)
# cast the unit, multiply base/frace separately
# to avoid precision issues from float -> int
mask = np.isnan(data)
coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns')
data = (coeff * data).astype(np.int64).view('timedelta64[ns]')
m, p = precision_from_unit(unit)
base = data.astype(np.int64)
frac = data - base
if p:
frac = np.round(frac, p)
data = (base * m + (frac * m).astype(np.int64)).view('timedelta64[ns]')
data[mask] = iNaT
copy = False

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/indexes/timedeltas/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,10 @@ def test_to_timedelta_on_missing_values(self):

actual = pd.to_timedelta(pd.NaT)
assert actual.value == timedelta_NaT.astype('int64')

def test_to_timedelta_float(self):
# https://github.com/pandas-dev/pandas/issues/25077
arr = np.arange(0, 1, 1e-6)[-10:]
result = pd.to_timedelta(arr, unit='s')
expected_asi8 = np.arange(999990000, int(1e9), 1000, dtype='int64')
tm.assert_numpy_array_equal(result.asi8, expected_asi8)