diff --git a/doc/source/release.rst b/doc/source/release.rst index 4b33c20424b33..fde13941d0266 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -771,6 +771,7 @@ Bug Fixes - Fix empty series not printing name in repr (:issue:`4651`) - Make tests create temp files in temp directory by default. (:issue:`5419`) - ``pd.to_timedelta`` of a scalar returns a scalar (:issue:`5410`) + - ``pd.to_timedelta`` accepts ``NaN`` and ``NaT``, returning ``NaT`` instead of raising (:issue:`5437`) pandas 0.12.0 ------------- diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 62aa95d270924..3ce9b9288e8c7 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1162,15 +1162,25 @@ def _try_fill(self, value): def _try_coerce_args(self, values, other): """ provide coercion to our input arguments - we are going to compare vs i8, so coerce to integer - values is always ndarra like, other may not be """ - values = values.view('i8') + we are going to compare vs i8, so coerce to floats + repring NaT with np.nan so nans propagate + values is always ndarray like, other may not be """ + def masker(v): + mask = isnull(v) + v = v.view('i8').astype('float64') + v[mask] = np.nan + return v + + values = masker(values) + if isnull(other) or (np.isscalar(other) and other == tslib.iNaT): - other = tslib.iNaT + other = np.nan elif isinstance(other, np.timedelta64): other = _coerce_scalar_to_timedelta_type(other,unit='s').item() + if other == tslib.iNaT: + other = np.nan else: - other = other.view('i8') + other = masker(other) return values, other diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 0c647bb6ee7eb..5e800ffd82306 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -255,7 +255,7 @@ def __init__(self, left, right, name): self.name = name lvalues = self._convert_to_array(left, name=name) - rvalues = self._convert_to_array(right, name=name) + rvalues = self._convert_to_array(right, name=name, other=lvalues) self.is_timedelta_lhs = com.is_timedelta64_dtype(left) self.is_datetime_lhs = com.is_datetime64_dtype(left) @@ -317,7 +317,7 @@ def _validate(self): 'of a series/ndarray of type datetime64[ns] ' 'or a timedelta') - def _convert_to_array(self, values, name=None): + def _convert_to_array(self, values, name=None, other=None): """converts values to ndarray""" from pandas.tseries.timedeltas import _possibly_cast_to_timedelta @@ -325,9 +325,16 @@ def _convert_to_array(self, values, name=None): if not is_list_like(values): values = np.array([values]) inferred_type = lib.infer_dtype(values) + if inferred_type in ('datetime64', 'datetime', 'date', 'time'): + # if we have a other of timedelta, but use pd.NaT here we + # we are in the wrong path + if other is not None and other.dtype == 'timedelta64[ns]' and all(isnull(v) for v in values): + values = np.empty(values.shape,dtype=other.dtype) + values[:] = tslib.iNaT + # a datetlike - if not (isinstance(values, (pa.Array, pd.Series)) and + elif not (isinstance(values, (pa.Array, pd.Series)) and com.is_datetime64_dtype(values)): values = tslib.array_to_datetime(values) elif isinstance(values, pd.DatetimeIndex): @@ -354,6 +361,15 @@ def _convert_to_array(self, values, name=None): ', '.join([com.pprint_thing(v) for v in values[mask]]))) values = _possibly_cast_to_timedelta(os, coerce=coerce) + elif inferred_type == 'floating': + + # all nan, so ok, use the other dtype (e.g. timedelta or datetime) + if isnull(values).all(): + values = np.empty(values.shape,dtype=other.dtype) + values[:] = tslib.iNaT + else: + raise TypeError("incompatible type [{0}] for a datetime/timedelta" + " operation".format(pa.array(values).dtype)) else: raise TypeError("incompatible type [{0}] for a datetime/timedelta" " operation".format(pa.array(values).dtype)) @@ -452,6 +468,8 @@ def na_op(x, y): def wrapper(left, right, name=name): + if isinstance(right, pd.DataFrame): + return NotImplemented time_converted = _TimeOp.maybe_convert_for_time_op(left, right, name) if time_converted is None: @@ -488,8 +506,6 @@ def wrapper(left, right, name=name): return left._constructor(wrap_results(arr), index=index, name=name, dtype=dtype) - elif isinstance(right, pd.DataFrame): - return NotImplemented else: # scalars if hasattr(lvalues, 'values'): diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 199ad19986b39..df03851ca4ddb 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -195,6 +195,122 @@ def test_timedelta_ops(self): expected = to_timedelta('00:00:08') tm.assert_almost_equal(result, expected) + def test_to_timedelta_on_missing_values(self): + _skip_if_numpy_not_friendly() + + # GH5438 + timedelta_NaT = np.timedelta64('NaT') + + actual = pd.to_timedelta(Series(['00:00:01', np.nan])) + expected = Series([np.timedelta64(1000000000, 'ns'), timedelta_NaT], dtype='