diff --git a/doc/source/release.rst b/doc/source/release.rst index a50a0f9c90b73..8d0f2c6a599e8 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -93,6 +93,7 @@ Improvements to existing features is frequency conversion. - Timedelta64 support ``fillna/ffill/bfill`` with an integer interpreted as seconds, or a ``timedelta`` (:issue:`3371`) + - Box numeric ops on ``timedelta`` Series (:issue:`4984`) - Datetime64 support ``ffill/bfill`` - Performance improvements with ``__getitem__`` on ``DataFrames`` with when the key is a column diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index bcb738d8a89cb..85ac48c379aad 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1204,6 +1204,25 @@ pass a timedelta to get a particular value. y.fillna(10) y.fillna(timedelta(days=-1,seconds=5)) +.. _timeseries.timedeltas_reductions: + +Time Deltas & Reductions +------------------------ + +.. warning:: + + A numeric reduction operation for ``timedelta64[ns]`` will return a single-element ``Series`` of + dtype ``timedelta64[ns]``. + +You can do numeric reduction operations on timedeltas. + +.. ipython:: python + + y2 = y.fillna(timedelta(days=-1,seconds=5)) + y2 + y2.mean() + y2.quantile(.1) + .. _timeseries.timedeltas_convert: Time Deltas & Conversions diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index bda6fa4cdf021..982ae939fc085 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -292,6 +292,14 @@ Enhancements td.fillna(0) td.fillna(timedelta(days=1,seconds=5)) + - You can do numeric reduction operations on timedeltas. Note that these will return + a single-element Series. + + .. ipython:: python + + td.mean() + td.quantile(.1) + - ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and ``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set the bandwidth, and to gkde.evaluate() to specify the indicies at which it diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 247f429d4b331..f9aeb1f726ff7 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -5,7 +5,7 @@ import numpy as np -from pandas.core.common import isnull, notnull, _values_from_object +from pandas.core.common import isnull, notnull, _values_from_object, is_float import pandas.core.common as com import pandas.lib as lib import pandas.algos as algos @@ -188,6 +188,10 @@ def _wrap_results(result,dtype): # as series will do the right thing in py3 (and deal with numpy 1.6.2 # bug in that it results dtype of timedelta64[us] from pandas import Series + + # coerce float to results + if is_float(result): + result = int(result) result = Series([result],dtype='timedelta64[ns]') else: result = result.view(dtype) @@ -224,11 +228,15 @@ def nanmean(values, axis=None, skipna=True): the_mean[ct_mask] = np.nan else: the_mean = the_sum / count if count > 0 else np.nan - return the_mean + + return _wrap_results(the_mean,dtype) @disallow('M8') @bottleneck_switch() def nanmedian(values, axis=None, skipna=True): + + values, mask, dtype = _get_values(values, skipna) + def get_median(x): mask = notnull(x) if not skipna and not mask.all(): @@ -257,7 +265,7 @@ def get_median(x): return ret # otherwise return a scalar value - return get_median(values) if notempty else np.nan + return _wrap_results(get_median(values),dtype) if notempty else np.nan @disallow('M8') diff --git a/pandas/core/series.py b/pandas/core/series.py index 942bb700a3718..8713ffb58392e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1981,7 +1981,12 @@ def quantile(self, q=0.5): valid_values = self.dropna().values if len(valid_values) == 0: return pa.NA - return _quantile(valid_values, q * 100) + result = _quantile(valid_values, q * 100) + if result.dtype == _TD_DTYPE: + from pandas.tseries.timedeltas import to_timedelta + return to_timedelta(result) + + return result def ptp(self, axis=None, out=None): return _values_from_object(self).ptp(axis, out) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 551507039112b..64e5728f0f549 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -7,7 +7,7 @@ import numpy as np import pandas as pd -from pandas import (Index, Series, DataFrame, isnull, notnull, +from pandas import (Index, Series, DataFrame, Timestamp, isnull, notnull, bdate_range, date_range, _np_version_under1p7) import pandas.core.common as com from pandas.compat import StringIO, lrange, range, zip, u, OrderedDict, long @@ -123,8 +123,8 @@ def conv(v): def test_nat_converters(self): _skip_if_numpy_not_friendly() - self.assert_(to_timedelta('nat') == tslib.iNaT) - self.assert_(to_timedelta('nan') == tslib.iNaT) + self.assert_(to_timedelta('nat',box=False) == tslib.iNaT) + self.assert_(to_timedelta('nan',box=False) == tslib.iNaT) def test_to_timedelta(self): _skip_if_numpy_not_friendly() @@ -133,11 +133,11 @@ def conv(v): return v.astype('m8[ns]') d1 = np.timedelta64(1,'D') - self.assert_(to_timedelta('1 days 06:05:01.00003') == conv(d1+np.timedelta64(6*3600+5*60+1,'s')+np.timedelta64(30,'us'))) - self.assert_(to_timedelta('15.5us') == conv(np.timedelta64(15500,'ns'))) + self.assert_(to_timedelta('1 days 06:05:01.00003',box=False) == conv(d1+np.timedelta64(6*3600+5*60+1,'s')+np.timedelta64(30,'us'))) + self.assert_(to_timedelta('15.5us',box=False) == conv(np.timedelta64(15500,'ns'))) # empty string - result = to_timedelta('') + result = to_timedelta('',box=False) self.assert_(result == tslib.iNaT) result = to_timedelta(['', '']) @@ -150,7 +150,7 @@ def conv(v): # ints result = np.timedelta64(0,'ns') - expected = to_timedelta(0) + expected = to_timedelta(0,box=False) self.assert_(result == expected) # Series @@ -163,6 +163,35 @@ def conv(v): expected = to_timedelta([0,10],unit='s') tm.assert_series_equal(result, expected) + # single element conversion + v = timedelta(seconds=1) + result = to_timedelta(v,box=False) + expected = to_timedelta([v]) + + v = np.timedelta64(timedelta(seconds=1)) + result = to_timedelta(v,box=False) + expected = to_timedelta([v]) + + def test_timedelta_ops(self): + _skip_if_numpy_not_friendly() + + # GH4984 + # make sure ops return timedeltas + s = Series([Timestamp('20130101') + timedelta(seconds=i*i) for i in range(10) ]) + td = s.diff() + + result = td.mean() + expected = to_timedelta(timedelta(seconds=9)) + tm.assert_series_equal(result, expected) + + result = td.quantile(.1) + expected = to_timedelta('00:00:02.6') + tm.assert_series_equal(result, expected) + + result = td.median() + expected = to_timedelta('00:00:08') + tm.assert_series_equal(result, expected) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 4d8633546e017..24e4b1377cc45 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -58,7 +58,7 @@ def _convert_listlike(arg, box): elif is_list_like(arg): return _convert_listlike(arg, box=box) - return _convert_listlike([ arg ], box=False)[0] + return _convert_listlike([ arg ], box=box) _short_search = re.compile( "^\s*(?P-?)\s*(?P\d*\.?\d*)\s*(?Pd|s|ms|us|ns)?\s*$",re.IGNORECASE)