1
1
# -*- coding: utf-8 -*-
2
2
# cython: profile=False
3
3
import collections
4
- import re
5
4
6
5
import sys
7
6
cdef bint PY3 = (sys.version_info[0 ] >= 3 )
@@ -236,6 +235,14 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
236
235
return < int64_t> (base * m) + < int64_t> (frac * m)
237
236
238
237
238
+ cdef inline _decode_if_necessary(object ts):
239
+ # decode ts if necessary
240
+ if not PyUnicode_Check(ts) and not PY3:
241
+ ts = str (ts).decode(' utf-8' )
242
+
243
+ return ts
244
+
245
+
239
246
cdef inline parse_timedelta_string(object ts):
240
247
"""
241
248
Parse a regular format timedelta string. Return an int64_t (in ns)
@@ -258,9 +265,7 @@ cdef inline parse_timedelta_string(object ts):
258
265
if len (ts) == 0 or ts in nat_strings:
259
266
return NPY_NAT
260
267
261
- # decode ts if necessary
262
- if not PyUnicode_Check(ts) and not PY3:
263
- ts = str (ts).decode(' utf-8' )
268
+ ts = _decode_if_necessary(ts)
264
269
265
270
for c in ts:
266
271
@@ -507,26 +512,14 @@ def _binary_op_method_timedeltalike(op, name):
507
512
# ----------------------------------------------------------------------
508
513
# Timedelta Construction
509
514
510
- iso_pater = re.compile(r """ P
511
- ( ?P<days> -? [0-9 ]* ) DT
512
- ( ?P<hours> [0-9 ]{1,2} ) H
513
- ( ?P<minutes> [0-9 ]{1,2} ) M
514
- ( ?P<seconds> [0-9 ]{0,2} )
515
- ( \.
516
- ( ?P<milliseconds> [0-9 ]{1,3} )
517
- ( ?P<microseconds> [0-9 ]{0,3} )
518
- ( ?P<nanoseconds> [0-9 ]{0,3} )
519
- ) ? S""" , re.VERBOSE)
520
-
521
-
522
- cdef int64_t parse_iso_format_string(object iso_fmt) except ? - 1 :
515
+ cdef inline int64_t parse_iso_format_string(object ts) except ? - 1 :
523
516
"""
524
517
Extracts and cleanses the appropriate values from a match object with
525
518
groups for each component of an ISO 8601 duration
526
519
527
520
Parameters
528
521
----------
529
- iso_fmt :
522
+ ts :
530
523
ISO 8601 Duration formatted string
531
524
532
525
Returns
@@ -537,25 +530,93 @@ cdef int64_t parse_iso_format_string(object iso_fmt) except? -1:
537
530
Raises
538
531
------
539
532
ValueError
540
- If ``iso_fmt `` cannot be parsed
533
+ If ``ts `` cannot be parsed
541
534
"""
542
535
543
- cdef int64_t ns = 0
536
+ cdef:
537
+ unicode c
538
+ int64_t result = 0 , r
539
+ int p= 0
540
+ object dec_unit = ' ms' , err_msg
541
+ bint have_dot= 0 , have_value= 0 , neg= 0
542
+ list number= [], unit= []
544
543
545
- match = re.match(iso_pater, iso_fmt)
546
- if match:
547
- match_dict = match.groupdict(default = ' 0' )
548
- for comp in [' milliseconds' , ' microseconds' , ' nanoseconds' ]:
549
- match_dict[comp] = ' {:0<3}' .format(match_dict[comp])
544
+ ts = _decode_if_necessary(ts)
550
545
551
- for k, v in match_dict.items():
552
- ns += timedelta_from_spec(v, ' 0' , k)
546
+ err_msg = " Invalid ISO 8601 Duration format - {}" .format(ts)
553
547
554
- else :
555
- raise ValueError (" Invalid ISO 8601 Duration format - "
556
- " {}" .format(iso_fmt))
548
+ for c in ts:
549
+ # number (ascii codes)
550
+ if ord (c) >= 48 and ord (c) <= 57 :
551
+
552
+ have_value = 1
553
+ if have_dot:
554
+ if p == 3 and dec_unit != ' ns' :
555
+ unit.append(dec_unit)
556
+ if dec_unit == ' ms' :
557
+ dec_unit = ' us'
558
+ elif dec_unit == ' us' :
559
+ dec_unit = ' ns'
560
+ p = 0
561
+ p += 1
562
+
563
+ if not len (unit):
564
+ number.append(c)
565
+ else :
566
+ # if in days, pop trailing T
567
+ if unit[- 1 ] == ' T' :
568
+ unit.pop()
569
+ elif ' H' in unit or ' M' in unit:
570
+ if len (number) > 2 :
571
+ raise ValueError (err_msg)
572
+ r = timedelta_from_spec(number, ' 0' , unit)
573
+ result += timedelta_as_neg(r, neg)
557
574
558
- return ns
575
+ neg = 0
576
+ unit, number = [], [c]
577
+ else :
578
+ if c == ' P' :
579
+ pass # ignore leading character
580
+ elif c == ' -' :
581
+ if neg or have_value:
582
+ raise ValueError (err_msg)
583
+ else :
584
+ neg = 1
585
+ elif c in [' D' , ' T' , ' H' , ' M' ]:
586
+ unit.append(c)
587
+ elif c == ' .' :
588
+ # append any seconds
589
+ if len (number):
590
+ r = timedelta_from_spec(number, ' 0' , ' S' )
591
+ result += timedelta_as_neg(r, neg)
592
+ unit, number = [], []
593
+ have_dot = 1
594
+ elif c == ' S' :
595
+ if have_dot: # ms, us, or ns
596
+ if not len (number) or p > 3 :
597
+ raise ValueError (err_msg)
598
+ # pad to 3 digits as required
599
+ pad = 3 - p
600
+ while pad > 0 :
601
+ number.append(' 0' )
602
+ pad -= 1
603
+
604
+ r = timedelta_from_spec(number, ' 0' , dec_unit)
605
+ result += timedelta_as_neg(r, neg)
606
+ else : # seconds
607
+ if len (number) <= 2 :
608
+ r = timedelta_from_spec(number, ' 0' , ' S' )
609
+ result += timedelta_as_neg(r, neg)
610
+ else :
611
+ raise ValueError (err_msg)
612
+ else :
613
+ raise ValueError (err_msg)
614
+
615
+ if not have_value:
616
+ # Received string only - never parsed any values
617
+ raise ValueError (err_msg)
618
+
619
+ return result
559
620
560
621
561
622
cdef _to_py_int_float(v):
0 commit comments