Skip to content

Commit dc66c9b

Browse files
jorisvandenbosscheMeeseeksDev[bot]
authored and
MeeseeksDev[bot]
committed
Backport PR pandas-dev#25651: REGR: to_timedelta precision issues with floating data
1 parent eef1dd8 commit dc66c9b

File tree

4 files changed

+33
-9
lines changed

4 files changed

+33
-9
lines changed

doc/source/whatsnew/v0.24.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Fixed Regressions
3131
- Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`)
3232
- Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`)
3333
- Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`)
34+
- Fixed regression in :func:`to_timedelta` losing precision when converting floating data to ``Timedelta`` data (:issue:`25077`).
3435
- Fixed pip installing from source into an environment without NumPy (:issue:`25193`)
3536
- Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`)
3637

pandas/_libs/tslibs/timedeltas.pyx

+16-3
Original file line numberDiff line numberDiff line change
@@ -246,9 +246,11 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
246246
return iresult.base # .base to access underlying np.ndarray
247247

248248

249-
cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
250-
""" return a casting of the unit represented to nanoseconds
251-
round the fractional part of a float to our precision, p """
249+
cpdef inline object precision_from_unit(object unit):
250+
"""
251+
Return a casting of the unit represented to nanoseconds + the precision
252+
to round the fractional part.
253+
"""
252254
cdef:
253255
int64_t m
254256
int p
@@ -285,6 +287,17 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
285287
p = 0
286288
else:
287289
raise ValueError("cannot cast unit {unit}".format(unit=unit))
290+
return m, p
291+
292+
293+
cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
294+
""" return a casting of the unit represented to nanoseconds
295+
round the fractional part of a float to our precision, p """
296+
cdef:
297+
int64_t m
298+
int p
299+
300+
m, p = precision_from_unit(unit)
288301

289302
# just give me the unit back
290303
if ts is None:

pandas/core/arrays/timedeltas.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT
1212
from pandas._libs.tslibs.fields import get_timedelta_field
1313
from pandas._libs.tslibs.timedeltas import (
14-
array_to_timedelta64, parse_timedelta_unit)
14+
array_to_timedelta64, parse_timedelta_unit, precision_from_unit)
1515
import pandas.compat as compat
1616
from pandas.util._decorators import Appender
1717

@@ -918,12 +918,15 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
918918
copy = copy and not copy_made
919919

920920
elif is_float_dtype(data.dtype):
921-
# treat as multiples of the given unit. If after converting to nanos,
922-
# there are fractional components left, these are truncated
923-
# (i.e. NOT rounded)
921+
# cast the unit, multiply base/frace separately
922+
# to avoid precision issues from float -> int
924923
mask = np.isnan(data)
925-
coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns')
926-
data = (coeff * data).astype(np.int64).view('timedelta64[ns]')
924+
m, p = precision_from_unit(unit)
925+
base = data.astype(np.int64)
926+
frac = data - base
927+
if p:
928+
frac = np.round(frac, p)
929+
data = (base * m + (frac * m).astype(np.int64)).view('timedelta64[ns]')
927930
data[mask] = iNaT
928931
copy = False
929932

pandas/tests/indexes/timedeltas/test_tools.py

+7
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,10 @@ def test_to_timedelta_on_missing_values(self):
173173

174174
actual = pd.to_timedelta(pd.NaT)
175175
assert actual.value == timedelta_NaT.astype('int64')
176+
177+
def test_to_timedelta_float(self):
178+
# https://github.com/pandas-dev/pandas/issues/25077
179+
arr = np.arange(0, 1, 1e-6)[-10:]
180+
result = pd.to_timedelta(arr, unit='s')
181+
expected_asi8 = np.arange(999990000, int(1e9), 1000, dtype='int64')
182+
tm.assert_numpy_array_equal(result.asi8, expected_asi8)

0 commit comments

Comments
 (0)