Skip to content

Commit bb5ec57

Browse files
committed
PERF: timedelta and datetime64 ops improvements
1 parent 611bbc5 commit bb5ec57

File tree

4 files changed

+31
-33
lines changed

4 files changed

+31
-33
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ Performance Improvements
5555
~~~~~~~~~~~~~~~~~~~~~~~~
5656

5757
- 4x improvement in ``timedelta`` string parsing (:issue:`6755`)
58+
- 8x improvement in ``timedelta64`` and ``datetime64`` ops (:issue:`6755`)
5859

5960
.. _whatsnew_0170.bug_fixes:
6061

pandas/tseries/timedeltas.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,13 @@ def _convert_listlike(arg, box, unit):
3434
if isinstance(arg, (list,tuple)) or ((hasattr(arg,'__iter__') and not hasattr(arg,'dtype'))):
3535
arg = np.array(list(arg), dtype='O')
3636

37+
# these are shortcutable
3738
if is_timedelta64_dtype(arg):
3839
value = arg.astype('timedelta64[ns]')
3940
elif is_integer_dtype(arg):
40-
41-
# these are shortcutable
42-
value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]')
41+
value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]', copy=False)
4342
else:
44-
try:
45-
value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, coerce=coerce)
46-
except:
47-
48-
# try to process strings fast; may need to fallback
49-
value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit, coerce=coerce) for r in arg ])
43+
value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, coerce=coerce)
5044
value = value.astype('timedelta64[ns]', copy=False)
5145

5246
if box:

pandas/tseries/tools.py

+7
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ def _convert_listlike(arg, box, format):
263263
if isinstance(arg, (list,tuple)):
264264
arg = np.array(arg, dtype='O')
265265

266+
# these are shortcutable
266267
if com.is_datetime64_ns_dtype(arg):
267268
if box and not isinstance(arg, DatetimeIndex):
268269
try:
@@ -271,6 +272,12 @@ def _convert_listlike(arg, box, format):
271272
pass
272273

273274
return arg
275+
elif format is None and com.is_integer_dtype(arg) and unit=='ns':
276+
result = arg.astype('datetime64[ns]')
277+
if box:
278+
return DatetimeIndex(result, tz='utc' if utc else None)
279+
280+
return result
274281

275282
arg = com._ensure_object(arg)
276283

pandas/tslib.pyx

+20-24
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,18 @@ from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray,
55
NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA)
66
import numpy as np
77

8+
# GH3363
9+
from sys import version_info
10+
cdef bint PY2 = version_info[0] == 2
11+
cdef bint PY3 = not PY2
12+
813
from cpython cimport (
914
PyTypeObject,
1015
PyFloat_Check,
1116
PyLong_Check,
1217
PyObject_RichCompareBool,
1318
PyObject_RichCompare,
1419
PyString_Check,
15-
PyUnicode_Contains,
16-
PyString_AsString,
1720
Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
1821
)
1922

@@ -50,15 +53,11 @@ else:
5053
from dateutil.tz import gettz as _dateutil_gettz
5154

5255
from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
53-
from pandas.compat import parse_date, string_types, PY3, iteritems
56+
from pandas.compat import parse_date, string_types, iteritems
5457

55-
from sys import version_info
5658
import operator
5759
import collections
5860

59-
# GH3363
60-
cdef bint PY2 = version_info[0] == 2
61-
6261
# initialize numpy
6362
import_array()
6463
#import_ufunc()
@@ -2291,12 +2290,10 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
22912290
"""
22922291

22932292
cdef:
2294-
char c
2295-
bytes bc
2293+
str c
22962294
bint neg=0, have_dot=0, have_value=0, have_hhmmss=0
22972295
object current_unit=None
2298-
Py_ssize_t i
2299-
int64_t result=0, m, r
2296+
int64_t result=0, m=0, r
23002297
list number=[], frac=[], unit=[]
23012298

23022299
# neg : tracks if we have a leading negative for the value
@@ -2307,39 +2304,38 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
23072304
if ts in _nat_strings or not len(ts):
23082305
return iNaT
23092306

2310-
for c in PyString_AsString(ts):
2311-
bc = <bytes>c
2307+
for c in ts:
23122308

23132309
# skip whitespace / commas
2314-
if bc == ' ' or bc == ',':
2310+
if c == ' ' or c == ',':
23152311
pass
23162312

23172313
# positive signs are ignored
2318-
elif bc == '+':
2314+
elif c == '+':
23192315
pass
23202316

23212317
# neg
2322-
elif bc == '-':
2318+
elif c == '-':
23232319

23242320
if neg or have_value or have_hhmmss:
23252321
raise ValueError("only leading negative signs are allowed")
23262322

23272323
neg = 1
23282324

23292325
# number (ascii codes)
2330-
elif c >= 48 and c <= 57:
2326+
elif ord(c) >= 48 and ord(c) <= 57:
23312327

23322328
if have_dot:
23332329

23342330
# we found a dot, but now its just a fraction
23352331
if len(unit):
2336-
number.append(bc)
2332+
number.append(c)
23372333
have_dot = 0
23382334
else:
2339-
frac.append(bc)
2335+
frac.append(c)
23402336

23412337
elif not len(unit):
2342-
number.append(bc)
2338+
number.append(c)
23432339

23442340
else:
23452341

@@ -2349,12 +2345,12 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
23492345
if coerce:
23502346
return iNaT
23512347
raise
2352-
unit, number, frac = [], [bc], []
2348+
unit, number, frac = [], [c], []
23532349

23542350
result += timedelta_as_neg(r, neg)
23552351

23562352
# hh:mm:ss.
2357-
elif bc == ':':
2353+
elif c == ':':
23582354

23592355
# we flip this off if we have a leading value
23602356
if have_value:
@@ -2381,7 +2377,7 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
23812377
unit, number = [], []
23822378

23832379
# after the decimal point
2384-
elif bc == '.':
2380+
elif c == '.':
23852381

23862382
if len(number) and current_unit is not None:
23872383

@@ -2400,7 +2396,7 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
24002396

24012397
# unit
24022398
else:
2403-
unit.append(bc)
2399+
unit.append(c)
24042400
have_value = 1
24052401
have_dot = 0
24062402

0 commit comments

Comments
 (0)