Skip to content

Commit 4bd1e6a

Browse files
committed
Merge pull request #6754 from jreback/timedelta_perf
PERF: perf improvements in timedelta conversions from integer dtypes
2 parents cf0ef42 + b480094 commit 4bd1e6a

File tree

7 files changed

+90
-12
lines changed

7 files changed

+90
-12
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ Improvements to existing features
208208
- Performance improvement when converting ``DatetimeIndex`` to floating ordinals
209209
using ``DatetimeConverter`` (:issue:`6636`)
210210
- Performance improvement for ``DataFrame.shift`` (:issue:`5609`)
211+
- Performance improvements in timedelta conversions for integer dtypes (:issue:`6754`)
211212

212213
.. _release.bug_fixes-0.14.0:
213214

pandas/core/common.py

+10
Original file line numberDiff line numberDiff line change
@@ -2130,6 +2130,16 @@ def is_timedelta64_dtype(arr_or_dtype):
21302130
return issubclass(tipo, np.timedelta64)
21312131

21322132

2133+
def is_timedelta64_ns_dtype(arr_or_dtype):
2134+
if isinstance(arr_or_dtype, np.dtype):
2135+
tipo = arr_or_dtype.type
2136+
elif isinstance(arr_or_dtype, type):
2137+
tipo = np.dtype(arr_or_dtype).type
2138+
else:
2139+
tipo = arr_or_dtype.dtype.type
2140+
return tipo == _TD_DTYPE
2141+
2142+
21332143
def needs_i8_conversion(arr_or_dtype):
21342144
return (is_datetime64_dtype(arr_or_dtype) or
21352145
is_timedelta64_dtype(arr_or_dtype))

pandas/tseries/tests/test_timedeltas.py

+30
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,36 @@ def conv(v):
173173
expected = np.timedelta64(timedelta(seconds=1))
174174
self.assertEqual(result, expected)
175175

176+
# arrays of various dtypes
177+
arr = np.array([1]*5,dtype='int64')
178+
result = to_timedelta(arr,unit='s')
179+
expected = Series([ np.timedelta64(1,'s') ]*5)
180+
tm.assert_series_equal(result, expected)
181+
182+
arr = np.array([1]*5,dtype='int64')
183+
result = to_timedelta(arr,unit='m')
184+
expected = Series([ np.timedelta64(1,'m') ]*5)
185+
tm.assert_series_equal(result, expected)
186+
187+
arr = np.array([1]*5,dtype='int64')
188+
result = to_timedelta(arr,unit='h')
189+
expected = Series([ np.timedelta64(1,'h') ]*5)
190+
tm.assert_series_equal(result, expected)
191+
192+
arr = np.array([1]*5,dtype='timedelta64[s]')
193+
result = to_timedelta(arr)
194+
expected = Series([ np.timedelta64(1,'s') ]*5)
195+
tm.assert_series_equal(result, expected)
196+
197+
arr = np.array([1]*5,dtype='timedelta64[D]')
198+
result = to_timedelta(arr)
199+
expected = Series([ np.timedelta64(1,'D') ]*5)
200+
tm.assert_series_equal(result, expected)
201+
202+
# these will error
203+
self.assertRaises(ValueError, lambda : to_timedelta(['1h']))
204+
self.assertRaises(ValueError, lambda : to_timedelta(['1m']))
205+
176206
def test_to_timedelta_via_apply(self):
177207
_skip_if_numpy_not_friendly()
178208

pandas/tseries/timedeltas.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import numpy as np
99
import pandas.tslib as tslib
1010
from pandas import compat, _np_version_under1p7
11-
from pandas.core.common import (ABCSeries, is_integer, is_timedelta64_dtype,
11+
from pandas.core.common import (ABCSeries, is_integer, is_integer_dtype, is_timedelta64_dtype,
1212
_values_from_object, is_list_like, isnull)
1313

1414
repr_timedelta = tslib.repr_timedelta64
@@ -23,7 +23,7 @@ def to_timedelta(arg, box=True, unit='ns'):
2323
arg : string, timedelta, array of strings (with possible NAs)
2424
box : boolean, default True
2525
If True returns a Series of the results, if False returns ndarray of values
26-
unit : unit of the arg (D,s,ms,us,ns) denote the unit, which is an integer/float number
26+
unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an integer/float number
2727
2828
Returns
2929
-------
@@ -32,18 +32,22 @@ def to_timedelta(arg, box=True, unit='ns'):
3232
if _np_version_under1p7:
3333
raise ValueError("to_timedelta is not support for numpy < 1.7")
3434

35-
def _convert_listlike(arg, box):
35+
def _convert_listlike(arg, box, unit):
3636

3737
if isinstance(arg, (list,tuple)):
3838
arg = np.array(arg, dtype='O')
3939

4040
if is_timedelta64_dtype(arg):
41-
if box:
42-
from pandas import Series
43-
return Series(arg,dtype='m8[ns]')
44-
return arg
41+
value = arg.astype('timedelta64[ns]')
42+
elif is_integer_dtype(arg):
43+
# these are shortcutable
44+
value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]')
45+
else:
46+
try:
47+
value = tslib.array_to_timedelta64(_ensure_object(arg),unit=unit)
48+
except:
49+
value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ])
4550

46-
value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit) for r in arg ])
4751
if box:
4852
from pandas import Series
4953
value = Series(value,dtype='m8[ns]')
@@ -53,10 +57,10 @@ def _convert_listlike(arg, box):
5357
return arg
5458
elif isinstance(arg, ABCSeries):
5559
from pandas import Series
56-
values = _convert_listlike(arg.values, box=False)
60+
values = _convert_listlike(arg.values, box=False, unit=unit)
5761
return Series(values, index=arg.index, name=arg.name, dtype='m8[ns]')
5862
elif is_list_like(arg):
59-
return _convert_listlike(arg, box=box)
63+
return _convert_listlike(arg, box=box, unit=unit)
6064

6165
# ...so it must be a scalar value. Return scalar.
6266
return _coerce_scalar_to_timedelta_type(arg, unit=unit)
@@ -139,7 +143,7 @@ def convert(r=None, unit=None, m=m):
139143
return convert
140144

141145
# no converter
142-
raise ValueError("cannot create timedelta string converter")
146+
raise ValueError("cannot create timedelta string converter for [{0}]".format(r))
143147

144148
def _possibly_cast_to_timedelta(value, coerce=True):
145149
""" try to cast to timedelta64, if already a timedeltalike, then make

vb_suite/suite.py

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
'reshape',
2727
'stat_ops',
2828
'timeseries',
29+
'timedelta',
2930
'eval']
3031

3132
by_module = {}

vb_suite/timedelta.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from vbench.api import Benchmark
2+
from datetime import datetime
3+
4+
common_setup = """from pandas_vb_common import *
5+
from pandas import to_timedelta
6+
"""
7+
8+
#----------------------------------------------------------------------
9+
# conversion
10+
11+
setup = common_setup + """
12+
arr = np.random.randint(0,1000,size=10000)
13+
"""
14+
15+
stmt = "to_timedelta(arr,unit='s')"
16+
timedelta_convert_int = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1))
17+
18+
setup = common_setup + """
19+
arr = np.random.randint(0,1000,size=10000)
20+
arr = [ '{0} days'.format(i) for i in arr ]
21+
"""
22+
23+
stmt = "to_timedelta(arr)"
24+
timedelta_convert_string = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1))
25+
26+
setup = common_setup + """
27+
arr = np.random.randint(0,60,size=10000)
28+
arr = [ '00:00:{0:02d}'.format(i) for i in arr ]
29+
"""
30+
31+
stmt = "to_timedelta(arr)"
32+
timedelta_convert_string_seconds = Benchmark(stmt, setup, start_date=datetime(2014, 1, 1))

vb_suite/timeseries.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def date_range(start=None, end=None, periods=None, freq=None):
278278
"""
279279

280280
datetimeindex_converter = \
281-
Benchmark('DatetimeConverter.convert(rng, None, None)',
281+
Benchmark('DatetimeConverter.convert(rng, None, None)',
282282
setup, start_date=datetime(2013, 1, 1))
283283

284284
# Adding custom business day

0 commit comments

Comments
 (0)