1
1
# -*- coding: utf-8 -*-
2
2
# cython: profile=False
3
3
import collections
4
- import re
5
4
6
5
import sys
7
6
cdef bint PY3 = (sys.version_info[0 ] >= 3 )
@@ -235,6 +234,13 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
235
234
frac = round (frac, p)
236
235
return < int64_t> (base * m) + < int64_t> (frac * m)
237
236
237
+ cdef inline _decode_if_necessary(object ts):
238
+ # decode ts if necessary
239
+ if not PyUnicode_Check(ts) and not PY3:
240
+ ts = str (ts).decode(' utf-8' )
241
+
242
+ return ts
243
+
238
244
239
245
cdef inline parse_timedelta_string(object ts):
240
246
"""
@@ -258,9 +264,7 @@ cdef inline parse_timedelta_string(object ts):
258
264
if len (ts) == 0 or ts in nat_strings:
259
265
return NPY_NAT
260
266
261
- # decode ts if necessary
262
- if not PyUnicode_Check(ts) and not PY3:
263
- ts = str (ts).decode(' utf-8' )
267
+ ts = _decode_if_necessary(ts)
264
268
265
269
for c in ts:
266
270
@@ -507,26 +511,14 @@ def _binary_op_method_timedeltalike(op, name):
507
511
# ----------------------------------------------------------------------
508
512
# Timedelta Construction
509
513
510
- iso_pater = re.compile(r """ P
511
- ( ?P<days> -? [0-9 ]* ) DT
512
- ( ?P<hours> [0-9 ]{1,2} ) H
513
- ( ?P<minutes> [0-9 ]{1,2} ) M
514
- ( ?P<seconds> [0-9 ]{0,2} )
515
- ( \.
516
- ( ?P<milliseconds> [0-9 ]{1,3} )
517
- ( ?P<microseconds> [0-9 ]{0,3} )
518
- ( ?P<nanoseconds> [0-9 ]{0,3} )
519
- ) ? S""" , re.VERBOSE)
520
-
521
-
522
- cdef int64_t parse_iso_format_string(object iso_fmt) except ? - 1 :
514
+ cdef inline int64_t parse_iso_format_string(object ts) except ? - 1 :
523
515
"""
524
516
Extracts and cleanses the appropriate values from a match object with
525
517
groups for each component of an ISO 8601 duration
526
518
527
519
Parameters
528
520
----------
529
- iso_fmt :
521
+ ts :
530
522
ISO 8601 Duration formatted string
531
523
532
524
Returns
@@ -537,25 +529,93 @@ cdef int64_t parse_iso_format_string(object iso_fmt) except? -1:
537
529
Raises
538
530
------
539
531
ValueError
540
- If ``iso_fmt `` cannot be parsed
532
+ If ``ts `` cannot be parsed
541
533
"""
542
534
543
- cdef int64_t ns = 0
535
+ cdef:
536
+ unicode c
537
+ int64_t result = 0 , r
538
+ int p= 0
539
+ object dec_unit = ' ms' , err_msg
540
+ bint have_dot= 0 , have_value= 0 , neg= 0
541
+ list number= [], unit= []
544
542
545
- match = re.match(iso_pater, iso_fmt)
546
- if match:
547
- match_dict = match.groupdict(default = ' 0' )
548
- for comp in [' milliseconds' , ' microseconds' , ' nanoseconds' ]:
549
- match_dict[comp] = ' {:0<3}' .format(match_dict[comp])
543
+ ts = _decode_if_necessary(ts)
550
544
551
- for k, v in match_dict.items():
552
- ns += timedelta_from_spec(v, ' 0' , k)
545
+ err_msg = " Invalid ISO 8601 Duration format - {}" .format(ts)
553
546
554
- else :
555
- raise ValueError ( " Invalid ISO 8601 Duration format - "
556
- " {} " .format(iso_fmt))
547
+ for c in ts :
548
+ # number (ascii codes)
549
+ if ord (c) >= 48 and ord (c) <= 57 :
557
550
558
- return ns
551
+ have_value = 1
552
+ if have_dot:
553
+ if p == 3 and dec_unit != ' ns' :
554
+ unit.append(dec_unit)
555
+ if dec_unit == ' ms' :
556
+ dec_unit = ' us'
557
+ elif dec_unit == ' us' :
558
+ dec_unit = ' ns'
559
+ p = 0
560
+ p += 1
561
+
562
+ if not len (unit):
563
+ number.append(c)
564
+ else :
565
+ # if in days, pop trailing T
566
+ if unit[- 1 ] == ' T' :
567
+ unit.pop()
568
+ elif ' H' in unit or ' M' in unit:
569
+ if len (number) > 2 :
570
+ raise ValueError (err_msg)
571
+ r = timedelta_from_spec(number, ' 0' , unit)
572
+ result += timedelta_as_neg(r, neg)
573
+
574
+ neg = 0
575
+ unit, number = [], [c]
576
+ else :
577
+ if c == ' P' :
578
+ pass # ignore leading character
579
+ elif c == ' -' :
580
+ if neg or have_value:
581
+ raise ValueError (err_msg)
582
+ else :
583
+ neg = 1
584
+ elif c in [' D' , ' T' , ' H' , ' M' ]:
585
+ unit.append(c)
586
+ elif c == ' .' :
587
+ # append any seconds
588
+ if len (number):
589
+ r = timedelta_from_spec(number, ' 0' , ' S' )
590
+ result += timedelta_as_neg(r, neg)
591
+ unit, number = [], []
592
+ have_dot = 1
593
+ elif c == ' S' :
594
+ if have_dot: # ms, us, or ns
595
+ if not len (number) or p > 3 :
596
+ raise ValueError (err_msg)
597
+ # pad to 3 digits as required
598
+ pad = 3 - p
599
+ while pad > 0 :
600
+ number.append(' 0' )
601
+ pad -= 1
602
+
603
+ r = timedelta_from_spec(number, ' 0' , dec_unit)
604
+ result += timedelta_as_neg(r, neg)
605
+ else : # seconds
606
+ if len (number) <= 2 :
607
+ r = timedelta_from_spec(number, ' 0' , ' S' )
608
+ result += timedelta_as_neg(r, neg)
609
+ else :
610
+ raise ValueError (err_msg)
611
+ else :
612
+ raise ValueError (err_msg)
613
+
614
+ if not have_value:
615
+ # Received string only - never parsed any values
616
+ raise ValueError (err_msg)
617
+
618
+ return result
559
619
560
620
561
621
cdef _to_py_int_float(v):
0 commit comments