Skip to content

Commit 6bab9da

Browse files
committed
ISO 8601 Duration Refactoring
1 parent e1a09ce commit 6bab9da

File tree

3 files changed

+53
-36
lines changed

3 files changed

+53
-36
lines changed

doc/source/timedeltas.rst

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ You can construct a ``Timedelta`` scalar through various arguments:
6767
pd.Timedelta('P0DT0H0M0.000000123S')
6868
6969
.. versionadded:: 0.23.0
70+
7071
Added constructor for `ISO 8601 Duration`_ strings
7172

7273
:ref:`DateOffsets<timeseries.offsets>` (``Day, Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction.

pandas/_libs/tslibs/timedeltas.pyx

+40-35
Original file line numberDiff line numberDiff line change
@@ -236,25 +236,6 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
236236
return <int64_t> (base *m) + <int64_t> (frac *m)
237237

238238

239-
cpdef match_iso_format(object ts):
240-
"""
241-
Match a provided string against an ISO 8601 pattern, providing a group for
242-
each ``Timedelta`` component.
243-
"""
244-
pater = re.compile(r"""P
245-
(?P<days>-?[0-9]*)DT
246-
(?P<hours>[0-9]{1,2})H
247-
(?P<minutes>[0-9]{1,2})M
248-
(?P<seconds>[0-9]{0,2})
249-
(\.
250-
(?P<milliseconds>[0-9]{0,3})
251-
(?P<microseconds>[0-9]{0,3})
252-
(?P<nanoseconds>[0-9]{0,3})
253-
)?S""", re.VERBOSE)
254-
255-
return re.match(pater, ts)
256-
257-
258239
cdef inline parse_timedelta_string(object ts):
259240
"""
260241
Parse a regular format timedelta string. Return an int64_t (in ns)
@@ -526,31 +507,55 @@ def _binary_op_method_timedeltalike(op, name):
526507
# ----------------------------------------------------------------------
527508
# Timedelta Construction
528509

529-
def _value_from_iso_match(match):
510+
iso_pater = re.compile(r"""P
511+
(?P<days>-?[0-9]*)DT
512+
(?P<hours>[0-9]{1,2})H
513+
(?P<minutes>[0-9]{1,2})M
514+
(?P<seconds>[0-9]{0,2})
515+
(\.
516+
(?P<milliseconds>[0-9]{1,3})
517+
(?P<microseconds>[0-9]{0,3})
518+
(?P<nanoseconds>[0-9]{0,3})
519+
)?S""", re.VERBOSE)
520+
521+
522+
cdef int64_t parse_iso_format_string(object iso_fmt) except? -1:
530523
"""
531524
Extracts and cleanses the appropriate values from a match object with
532525
groups for each component of an ISO 8601 duration
533526
534527
Parameters
535528
----------
536-
match:
537-
Regular expression with groups for each component of an ISO 8601
538-
duration
529+
iso_fmt:
530+
ISO 8601 Duration formatted string
539531
540532
Returns
541533
-------
542-
int
534+
ns: int64_t
543535
Precision in nanoseconds of matched ISO 8601 duration
536+
537+
Raises
538+
------
539+
ValueError
540+
If ``iso_fmt`` cannot be parsed
544541
"""
545-
match_dict = {k: v for k, v in match.groupdict().items() if v}
546-
for comp in ['milliseconds', 'microseconds', 'nanoseconds']:
547-
if comp in match_dict:
548-
match_dict[comp] ='{:0<3}'.format(match_dict[comp])
549542

550-
match_dict = {k: int(v) for k, v in match_dict.items()}
551-
nano = match_dict.pop('nanoseconds', 0)
543+
cdef int64_t ns = 0
544+
545+
match = re.match(iso_pater, iso_fmt)
546+
if match:
547+
match_dict = match.groupdict(default='0')
548+
for comp in ['milliseconds', 'microseconds', 'nanoseconds']:
549+
match_dict[comp] = '{:0<3}'.format(match_dict[comp])
550+
551+
for k, v in match_dict.items():
552+
ns += timedelta_from_spec(v, '0', k)
553+
554+
else:
555+
raise ValueError("Invalid ISO 8601 Duration format - "
556+
"{}".format(iso_fmt))
552557

553-
return nano + convert_to_timedelta64(timedelta(**match_dict), 'ns')
558+
return ns
554559

555560

556561
cdef _to_py_int_float(v):
@@ -872,11 +877,11 @@ class Timedelta(_Timedelta):
872877
if isinstance(value, Timedelta):
873878
value = value.value
874879
elif is_string_object(value):
875-
if len(value) > 0 and value[0] == 'P': # hackish
876-
match = match_iso_format(value)
877-
value = _value_from_iso_match(match)
880+
if len(value) > 0 and value[0] == 'P':
881+
value = parse_iso_format_string(value)
878882
else:
879-
value = np.timedelta64(parse_timedelta_string(value))
883+
value = parse_timedelta_string(value)
884+
value = np.timedelta64(value)
880885
elif PyDelta_Check(value):
881886
value = convert_to_timedelta64(value, 'ns')
882887
elif is_timedelta64_object(value):

pandas/tests/scalar/test_timedelta.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,17 @@ def test_isoformat(self):
865865
('P0DT0H0M0.000000123S', Timedelta(nanoseconds=123)),
866866
('P0DT0H0M0.00001S', Timedelta(microseconds=10)),
867867
('P0DT0H0M0.001S', Timedelta(milliseconds=1)),
868-
('P0DT0H1M0S', Timedelta(minutes=1))])
868+
('P0DT0H1M0S', Timedelta(minutes=1)),
869+
('P1DT25H61M61S', Timedelta(days=1, hours=25, minutes=61, seconds=61))
870+
])
869871
def test_iso_constructor(self, fmt, exp):
870872
assert Timedelta(fmt) == exp
873+
874+
@pytest.mark.parametrize('fmt', [
875+
'PPPPPPPPPPPP', 'PDTHMS', 'P0DT999H999M999S',
876+
'P1DT0H0M0.0000000000000S', 'P1DT0H0M00000000000S',
877+
'P1DT0H0M0.S'])
878+
def test_iso_constructor_raises(self, fmt):
879+
with tm.assert_raises_regex(ValueError, 'Invalid ISO 8601 Duration '
880+
'format - {}'.format(fmt)):
881+
Timedelta(fmt)

0 commit comments

Comments
 (0)