Skip to content

Commit 72a0c43

Browse files
committed
Merge pull request #5438 from danielballan/to-timedelta-missing-data
BUG: pd.to_timedelta handles missing data
2 parents 0d5c06f + 4e1cb58 commit 72a0c43

File tree

5 files changed

+156
-11
lines changed

5 files changed

+156
-11
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,7 @@ Bug Fixes
802802
- Fix empty series not printing name in repr (:issue:`4651`)
803803
- Make tests create temp files in temp directory by default. (:issue:`5419`)
804804
- ``pd.to_timedelta`` of a scalar returns a scalar (:issue:`5410`)
805+
- ``pd.to_timedelta`` accepts ``NaN`` and ``NaT``, returning ``NaT`` instead of raising (:issue:`5437`)
805806

806807
pandas 0.12.0
807808
-------------

pandas/core/internals.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -1162,15 +1162,25 @@ def _try_fill(self, value):
11621162

11631163
def _try_coerce_args(self, values, other):
11641164
""" provide coercion to our input arguments
1165-
we are going to compare vs i8, so coerce to integer
1166-
values is always ndarra like, other may not be """
1167-
values = values.view('i8')
1165+
we are going to compare vs i8, so coerce to floats
1166+
repring NaT with np.nan so nans propagate
1167+
values is always ndarray like, other may not be """
1168+
def masker(v):
1169+
mask = isnull(v)
1170+
v = v.view('i8').astype('float64')
1171+
v[mask] = np.nan
1172+
return v
1173+
1174+
values = masker(values)
1175+
11681176
if isnull(other) or (np.isscalar(other) and other == tslib.iNaT):
1169-
other = tslib.iNaT
1177+
other = np.nan
11701178
elif isinstance(other, np.timedelta64):
11711179
other = _coerce_scalar_to_timedelta_type(other,unit='s').item()
1180+
if other == tslib.iNaT:
1181+
other = np.nan
11721182
else:
1173-
other = other.view('i8')
1183+
other = masker(other)
11741184

11751185
return values, other
11761186

pandas/core/ops.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def __init__(self, left, right, name):
243243
self.name = name
244244

245245
lvalues = self._convert_to_array(left, name=name)
246-
rvalues = self._convert_to_array(right, name=name)
246+
rvalues = self._convert_to_array(right, name=name, other=lvalues)
247247

248248
self.is_timedelta_lhs = com.is_timedelta64_dtype(left)
249249
self.is_datetime_lhs = com.is_datetime64_dtype(left)
@@ -305,17 +305,24 @@ def _validate(self):
305305
'of a series/ndarray of type datetime64[ns] '
306306
'or a timedelta')
307307

308-
def _convert_to_array(self, values, name=None):
308+
def _convert_to_array(self, values, name=None, other=None):
309309
"""converts values to ndarray"""
310310
from pandas.tseries.timedeltas import _possibly_cast_to_timedelta
311311

312312
coerce = 'compat' if pd._np_version_under1p7 else True
313313
if not is_list_like(values):
314314
values = np.array([values])
315315
inferred_type = lib.infer_dtype(values)
316+
316317
if inferred_type in ('datetime64', 'datetime', 'date', 'time'):
318+
# if we have a other of timedelta, but use pd.NaT here we
319+
# we are in the wrong path
320+
if other is not None and other.dtype == 'timedelta64[ns]' and all(isnull(v) for v in values):
321+
values = np.empty(values.shape,dtype=other.dtype)
322+
values[:] = tslib.iNaT
323+
317324
# a datetlike
318-
if not (isinstance(values, (pa.Array, pd.Series)) and
325+
elif not (isinstance(values, (pa.Array, pd.Series)) and
319326
com.is_datetime64_dtype(values)):
320327
values = tslib.array_to_datetime(values)
321328
elif isinstance(values, pd.DatetimeIndex):
@@ -342,6 +349,15 @@ def _convert_to_array(self, values, name=None):
342349
', '.join([com.pprint_thing(v)
343350
for v in values[mask]])))
344351
values = _possibly_cast_to_timedelta(os, coerce=coerce)
352+
elif inferred_type == 'floating':
353+
354+
# all nan, so ok, use the other dtype (e.g. timedelta or datetime)
355+
if isnull(values).all():
356+
values = np.empty(values.shape,dtype=other.dtype)
357+
values[:] = tslib.iNaT
358+
else:
359+
raise TypeError("incompatible type [{0}] for a datetime/timedelta"
360+
" operation".format(pa.array(values).dtype))
345361
else:
346362
raise TypeError("incompatible type [{0}] for a datetime/timedelta"
347363
" operation".format(pa.array(values).dtype))
@@ -440,6 +456,8 @@ def na_op(x, y):
440456

441457
def wrapper(left, right, name=name):
442458

459+
if isinstance(right, pd.DataFrame):
460+
return NotImplemented
443461
time_converted = _TimeOp.maybe_convert_for_time_op(left, right, name)
444462

445463
if time_converted is None:
@@ -476,8 +494,6 @@ def wrapper(left, right, name=name):
476494

477495
return left._constructor(wrap_results(arr), index=index,
478496
name=name, dtype=dtype)
479-
elif isinstance(right, pd.DataFrame):
480-
return NotImplemented
481497
else:
482498
# scalars
483499
if hasattr(lvalues, 'values'):

pandas/tseries/tests/test_timedeltas.py

+116
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,122 @@ def test_timedelta_ops(self):
195195
expected = to_timedelta('00:00:08')
196196
tm.assert_almost_equal(result, expected)
197197

198+
def test_to_timedelta_on_missing_values(self):
199+
_skip_if_numpy_not_friendly()
200+
201+
# GH5438
202+
timedelta_NaT = np.timedelta64('NaT')
203+
204+
actual = pd.to_timedelta(Series(['00:00:01', np.nan]))
205+
expected = Series([np.timedelta64(1000000000, 'ns'), timedelta_NaT], dtype='<m8[ns]')
206+
assert_series_equal(actual, expected)
207+
208+
actual = pd.to_timedelta(Series(['00:00:01', pd.NaT]))
209+
assert_series_equal(actual, expected)
210+
211+
actual = pd.to_timedelta(np.nan)
212+
self.assert_(actual == timedelta_NaT)
213+
214+
actual = pd.to_timedelta(pd.NaT)
215+
self.assert_(actual == timedelta_NaT)
216+
217+
def test_timedelta_ops_with_missing_values(self):
218+
_skip_if_numpy_not_friendly()
219+
220+
# setup
221+
s1 = pd.to_timedelta(Series(['00:00:01']))
222+
s2 = pd.to_timedelta(Series(['00:00:02']))
223+
sn = pd.to_timedelta(Series([pd.NaT]))
224+
df1 = DataFrame(['00:00:01']).apply(pd.to_timedelta)
225+
df2 = DataFrame(['00:00:02']).apply(pd.to_timedelta)
226+
dfn = DataFrame([pd.NaT]).apply(pd.to_timedelta)
227+
scalar1 = pd.to_timedelta('00:00:01')
228+
scalar2 = pd.to_timedelta('00:00:02')
229+
timedelta_NaT = pd.to_timedelta('NaT')
230+
NA = np.nan
231+
232+
actual = scalar1 + scalar1
233+
self.assert_(actual == scalar2)
234+
actual = scalar2 - scalar1
235+
self.assert_(actual == scalar1)
236+
237+
actual = s1 + s1
238+
assert_series_equal(actual, s2)
239+
actual = s2 - s1
240+
assert_series_equal(actual, s1)
241+
242+
actual = s1 + scalar1
243+
assert_series_equal(actual, s2)
244+
actual = s2 - scalar1
245+
assert_series_equal(actual, s1)
246+
247+
actual = s1 + timedelta_NaT
248+
assert_series_equal(actual, sn)
249+
actual = s1 - timedelta_NaT
250+
assert_series_equal(actual, sn)
251+
252+
actual = s1 + NA
253+
assert_series_equal(actual, sn)
254+
actual = s1 - NA
255+
assert_series_equal(actual, sn)
256+
257+
actual = s1 + pd.NaT # NaT is datetime, not timedelta
258+
assert_series_equal(actual, sn)
259+
actual = s2 - pd.NaT
260+
assert_series_equal(actual, sn)
261+
262+
actual = s1 + df1
263+
assert_frame_equal(actual, df2)
264+
actual = s2 - df1
265+
assert_frame_equal(actual, df1)
266+
actual = df1 + s1
267+
assert_frame_equal(actual, df2)
268+
actual = df2 - s1
269+
assert_frame_equal(actual, df1)
270+
271+
actual = df1 + df1
272+
assert_frame_equal(actual, df2)
273+
actual = df2 - df1
274+
assert_frame_equal(actual, df1)
275+
276+
actual = df1 + scalar1
277+
assert_frame_equal(actual, df2)
278+
actual = df2 - scalar1
279+
assert_frame_equal(actual, df1)
280+
281+
actual = df1 + timedelta_NaT
282+
assert_frame_equal(actual, dfn)
283+
actual = df1 - timedelta_NaT
284+
assert_frame_equal(actual, dfn)
285+
286+
actual = df1 + NA
287+
assert_frame_equal(actual, dfn)
288+
actual = df1 - NA
289+
assert_frame_equal(actual, dfn)
290+
291+
actual = df1 + pd.NaT # NaT is datetime, not timedelta
292+
assert_frame_equal(actual, dfn)
293+
actual = df1 - pd.NaT
294+
assert_frame_equal(actual, dfn)
295+
296+
def test_apply_to_timedelta(self):
297+
_skip_if_numpy_not_friendly()
298+
299+
timedelta_NaT = pd.to_timedelta('NaT')
300+
301+
list_of_valid_strings = ['00:00:01', '00:00:02']
302+
a = pd.to_timedelta(list_of_valid_strings)
303+
b = Series(list_of_valid_strings).apply(pd.to_timedelta)
304+
# Can't compare until apply on a Series gives the correct dtype
305+
# assert_series_equal(a, b)
306+
307+
list_of_strings = ['00:00:01', np.nan, pd.NaT, timedelta_NaT]
308+
a = pd.to_timedelta(list_of_strings)
309+
b = Series(list_of_strings).apply(pd.to_timedelta)
310+
# Can't compare until apply on a Series gives the correct dtype
311+
# assert_series_equal(a, b)
312+
313+
198314
if __name__ == '__main__':
199315
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
200316
exit=False)

pandas/tseries/timedeltas.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import pandas.tslib as tslib
1010
from pandas import compat, _np_version_under1p7
1111
from pandas.core.common import (ABCSeries, is_integer, is_timedelta64_dtype,
12-
_values_from_object, is_list_like)
12+
_values_from_object, is_list_like, isnull)
1313

1414
repr_timedelta = tslib.repr_timedelta64
1515
repr_timedelta64 = tslib.repr_timedelta64
@@ -84,6 +84,8 @@ def conv(v):
8484
r = conv(r)
8585
elif r == tslib.iNaT:
8686
return r
87+
elif isnull(r):
88+
return np.timedelta64('NaT')
8789
elif isinstance(r, np.timedelta64):
8890
r = r.astype("m8[{0}]".format(unit.lower()))
8991
elif is_integer(r):

0 commit comments

Comments
 (0)