Skip to content

Commit b326ec1

Browse files
jrebackgambogi
authored andcommitted
PERF: parse timedelta strings in cython pandas-dev#6755
1 parent 7d82b34 commit b326ec1

File tree

3 files changed

+26
-15
lines changed

3 files changed

+26
-15
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ Performance Improvements
6363

6464
- 4x improvement in ``timedelta`` string parsing (:issue:`6755`)
6565
- 8x improvement in ``timedelta64`` and ``datetime64`` ops (:issue:`6755`)
66+
=======
6667

6768
.. _whatsnew_0170.bug_fixes:
6869

pandas/tseries/timedeltas.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,12 @@ def _convert_listlike(arg, box, unit):
4040
elif is_integer_dtype(arg):
4141
value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]', copy=False)
4242
else:
43-
value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, coerce=coerce)
43+
try:
44+
value = tslib.array_to_timedelta64(_ensure_object(arg), unit=unit, coerce=coerce)
45+
except:
46+
47+
# try to process strings fast; may need to fallback
48+
value = np.array([ _coerce_scalar_to_timedelta_type(r, unit=unit, coerce=coerce) for r in arg ])
4449
value = value.astype('timedelta64[ns]', copy=False)
4550

4651
if box:

pandas/tslib.pyx

+19-14
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ from cpython cimport (
1717
PyObject_RichCompareBool,
1818
PyObject_RichCompare,
1919
PyString_Check,
20+
PyUnicode_Contains,
21+
PyString_AsString,
2022
Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
2123
)
2224

@@ -2290,10 +2292,12 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
22902292
"""
22912293

22922294
cdef:
2293-
str c
2295+
char c
2296+
bytes bc
22942297
bint neg=0, have_dot=0, have_value=0, have_hhmmss=0
22952298
object current_unit=None
2296-
int64_t result=0, m=0, r
2299+
Py_ssize_t i
2300+
int64_t result=0, m, r
22972301
list number=[], frac=[], unit=[]
22982302

22992303
# neg : tracks if we have a leading negative for the value
@@ -2304,38 +2308,39 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
23042308
if ts in _nat_strings or not len(ts):
23052309
return iNaT
23062310

2307-
for c in ts:
2311+
for c in PyString_AsString(ts):
2312+
bc = <bytes>c
23082313

23092314
# skip whitespace / commas
2310-
if c == ' ' or c == ',':
2315+
if bc == ' ' or bc == ',':
23112316
pass
23122317

23132318
# positive signs are ignored
2314-
elif c == '+':
2319+
elif bc == '+':
23152320
pass
23162321

23172322
# neg
2318-
elif c == '-':
2323+
elif bc == '-':
23192324

23202325
if neg or have_value or have_hhmmss:
23212326
raise ValueError("only leading negative signs are allowed")
23222327

23232328
neg = 1
23242329

23252330
# number (ascii codes)
2326-
elif ord(c) >= 48 and ord(c) <= 57:
2331+
elif c >= 48 and c <= 57:
23272332

23282333
if have_dot:
23292334

23302335
# we found a dot, but now its just a fraction
23312336
if len(unit):
2332-
number.append(c)
2337+
number.append(bc)
23332338
have_dot = 0
23342339
else:
2335-
frac.append(c)
2340+
frac.append(bc)
23362341

23372342
elif not len(unit):
2338-
number.append(c)
2343+
number.append(bc)
23392344

23402345
else:
23412346

@@ -2345,12 +2350,12 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
23452350
if coerce:
23462351
return iNaT
23472352
raise
2348-
unit, number, frac = [], [c], []
2353+
unit, number, frac = [], [bc], []
23492354

23502355
result += timedelta_as_neg(r, neg)
23512356

23522357
# hh:mm:ss.
2353-
elif c == ':':
2358+
elif bc == ':':
23542359

23552360
# we flip this off if we have a leading value
23562361
if have_value:
@@ -2377,7 +2382,7 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
23772382
unit, number = [], []
23782383

23792384
# after the decimal point
2380-
elif c == '.':
2385+
elif bc == '.':
23812386

23822387
if len(number) and current_unit is not None:
23832388

@@ -2396,7 +2401,7 @@ cdef inline parse_timedelta_string(object ts, coerce=False):
23962401

23972402
# unit
23982403
else:
2399-
unit.append(c)
2404+
unit.append(bc)
24002405
have_value = 1
24012406
have_dot = 0
24022407

0 commit comments

Comments
 (0)