Skip to content

Commit c6166b0

Browse files
WillAydjreback
authored andcommitted
Added ISO 8601 Duration string constructor for Timedelta (#19065)
1 parent 35b2aba commit c6166b0

File tree

5 files changed

+123
-1
lines changed

5 files changed

+123
-1
lines changed

asv_bench/benchmarks/timedelta.py

+31
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,40 @@
1+
import datetime
2+
13
import numpy as np
24
import pandas as pd
35

46
from pandas import to_timedelta, Timestamp, Timedelta
57

68

9+
class TimedeltaConstructor(object):
10+
goal_time = 0.2
11+
12+
def time_from_int(self):
13+
Timedelta(123456789)
14+
15+
def time_from_unit(self):
16+
Timedelta(1, unit='d')
17+
18+
def time_from_components(self):
19+
Timedelta(days=1, hours=2, minutes=3, seconds=4, milliseconds=5,
20+
microseconds=6, nanoseconds=7)
21+
22+
def time_from_datetime_timedelta(self):
23+
Timedelta(datetime.timedelta(days=1, seconds=1))
24+
25+
def time_from_np_timedelta(self):
26+
Timedelta(np.timedelta64(1, 'ms'))
27+
28+
def time_from_string(self):
29+
Timedelta('1 days')
30+
31+
def time_from_iso_format(self):
32+
Timedelta('P4DT12H30M5S')
33+
34+
def time_from_missing(self):
35+
Timedelta('nat')
36+
37+
738
class ToTimedelta(object):
839
goal_time = 0.2
940

doc/source/timedeltas.rst

+8
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ You can construct a ``Timedelta`` scalar through various arguments:
6262
pd.Timedelta('nan')
6363
pd.Timedelta('nat')
6464
65+
# ISO 8601 Duration strings
66+
pd.Timedelta('P0DT0H1M0S')
67+
pd.Timedelta('P0DT0H0M0.000000123S')
68+
69+
.. versionadded:: 0.23.0
70+
71+
Added constructor for `ISO 8601 Duration`_ strings
72+
6573
:ref:`DateOffsets<timeseries.offsets>` (``Day, Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction.
6674

6775
.. ipython:: python

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ Other API Changes
211211
- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`)
212212
- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`)
213213
- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`)
214+
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
214215

215216
.. _whatsnew_0230.deprecations:
216217

pandas/_libs/tslibs/timedeltas.pyx

+57-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# -*- coding: utf-8 -*-
22
# cython: profile=False
33
import collections
4+
import re
45

56
import sys
67
cdef bint PY3 = (sys.version_info[0] >= 3)
@@ -506,6 +507,57 @@ def _binary_op_method_timedeltalike(op, name):
506507
# ----------------------------------------------------------------------
507508
# Timedelta Construction
508509

510+
iso_pater = re.compile(r"""P
511+
(?P<days>-?[0-9]*)DT
512+
(?P<hours>[0-9]{1,2})H
513+
(?P<minutes>[0-9]{1,2})M
514+
(?P<seconds>[0-9]{0,2})
515+
(\.
516+
(?P<milliseconds>[0-9]{1,3})
517+
(?P<microseconds>[0-9]{0,3})
518+
(?P<nanoseconds>[0-9]{0,3})
519+
)?S""", re.VERBOSE)
520+
521+
522+
cdef int64_t parse_iso_format_string(object iso_fmt) except? -1:
523+
"""
524+
Extracts and cleanses the appropriate values from a match object with
525+
groups for each component of an ISO 8601 duration
526+
527+
Parameters
528+
----------
529+
iso_fmt:
530+
ISO 8601 Duration formatted string
531+
532+
Returns
533+
-------
534+
ns: int64_t
535+
Precision in nanoseconds of matched ISO 8601 duration
536+
537+
Raises
538+
------
539+
ValueError
540+
If ``iso_fmt`` cannot be parsed
541+
"""
542+
543+
cdef int64_t ns = 0
544+
545+
match = re.match(iso_pater, iso_fmt)
546+
if match:
547+
match_dict = match.groupdict(default='0')
548+
for comp in ['milliseconds', 'microseconds', 'nanoseconds']:
549+
match_dict[comp] = '{:0<3}'.format(match_dict[comp])
550+
551+
for k, v in match_dict.items():
552+
ns += timedelta_from_spec(v, '0', k)
553+
554+
else:
555+
raise ValueError("Invalid ISO 8601 Duration format - "
556+
"{}".format(iso_fmt))
557+
558+
return ns
559+
560+
509561
cdef _to_py_int_float(v):
510562
# Note: This used to be defined inside Timedelta.__new__
511563
# but cython will not allow `cdef` functions to be defined dynamically.
@@ -825,7 +877,11 @@ class Timedelta(_Timedelta):
825877
if isinstance(value, Timedelta):
826878
value = value.value
827879
elif is_string_object(value):
828-
value = np.timedelta64(parse_timedelta_string(value))
880+
if len(value) > 0 and value[0] == 'P':
881+
value = parse_iso_format_string(value)
882+
else:
883+
value = parse_timedelta_string(value)
884+
value = np.timedelta64(value)
829885
elif PyDelta_Check(value):
830886
value = convert_to_timedelta64(value, 'ns')
831887
elif is_timedelta64_object(value):

pandas/tests/scalar/test_timedelta.py

+26
Original file line numberDiff line numberDiff line change
@@ -853,3 +853,29 @@ def test_isoformat(self):
853853
result = Timedelta(minutes=1).isoformat()
854854
expected = 'P0DT0H1M0S'
855855
assert result == expected
856+
857+
@pytest.mark.parametrize('fmt,exp', [
858+
('P6DT0H50M3.010010012S', Timedelta(days=6, minutes=50, seconds=3,
859+
milliseconds=10, microseconds=10,
860+
nanoseconds=12)),
861+
('P-6DT0H50M3.010010012S', Timedelta(days=-6, minutes=50, seconds=3,
862+
milliseconds=10, microseconds=10,
863+
nanoseconds=12)),
864+
('P4DT12H30M5S', Timedelta(days=4, hours=12, minutes=30, seconds=5)),
865+
('P0DT0H0M0.000000123S', Timedelta(nanoseconds=123)),
866+
('P0DT0H0M0.00001S', Timedelta(microseconds=10)),
867+
('P0DT0H0M0.001S', Timedelta(milliseconds=1)),
868+
('P0DT0H1M0S', Timedelta(minutes=1)),
869+
('P1DT25H61M61S', Timedelta(days=1, hours=25, minutes=61, seconds=61))
870+
])
871+
def test_iso_constructor(self, fmt, exp):
872+
assert Timedelta(fmt) == exp
873+
874+
@pytest.mark.parametrize('fmt', [
875+
'PPPPPPPPPPPP', 'PDTHMS', 'P0DT999H999M999S',
876+
'P1DT0H0M0.0000000000000S', 'P1DT0H0M00000000000S',
877+
'P1DT0H0M0.S'])
878+
def test_iso_constructor_raises(self, fmt):
879+
with tm.assert_raises_regex(ValueError, 'Invalid ISO 8601 Duration '
880+
'format - {}'.format(fmt)):
881+
Timedelta(fmt)

0 commit comments

Comments
 (0)