From e1a09ce4a51f525abe4bf4a65f504f5f9e03ad6f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Wed, 3 Jan 2018 14:22:36 -0800 Subject: [PATCH 1/2] Added ISO 8601 Duration string constructor for Timedelta --- asv_bench/benchmarks/timedelta.py | 31 ++++++++++++++++ doc/source/timedeltas.rst | 7 ++++ doc/source/whatsnew/v0.23.0.txt | 1 + pandas/_libs/tslibs/timedeltas.pyx | 53 ++++++++++++++++++++++++++- pandas/tests/scalar/test_timedelta.py | 15 ++++++++ 5 files changed, 106 insertions(+), 1 deletion(-) diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index f99f95678a0b7..1897b0287ed19 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -1,9 +1,40 @@ +import datetime + import numpy as np import pandas as pd from pandas import to_timedelta, Timestamp, Timedelta +class TimedeltaConstructor(object): + goal_time = 0.2 + + def time_from_int(self): + Timedelta(123456789) + + def time_from_unit(self): + Timedelta(1, unit='d') + + def time_from_components(self): + Timedelta(days=1, hours=2, minutes=3, seconds=4, milliseconds=5, + microseconds=6, nanoseconds=7) + + def time_from_datetime_timedelta(self): + Timedelta(datetime.timedelta(days=1, seconds=1)) + + def time_from_np_timedelta(self): + Timedelta(np.timedelta64(1, 'ms')) + + def time_from_string(self): + Timedelta('1 days') + + def time_from_iso_format(self): + Timedelta('P4DT12H30M5S') + + def time_from_missing(self): + Timedelta('nat') + + class ToTimedelta(object): goal_time = 0.2 diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst index 6bbfb54629c4d..5c6652bf1f5b2 100644 --- a/doc/source/timedeltas.rst +++ b/doc/source/timedeltas.rst @@ -62,6 +62,13 @@ You can construct a ``Timedelta`` scalar through various arguments: pd.Timedelta('nan') pd.Timedelta('nat') + # ISO 8601 Duration strings + pd.Timedelta('P0DT0H1M0S') + pd.Timedelta('P0DT0H0M0.000000123S') + +.. versionadded:: 0.23.0 + Added constructor for `ISO 8601 Duration`_ strings + :ref:`DateOffsets` (``Day, Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction. .. ipython:: python diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index c7b1cb4379700..4d806f1f05a16 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -211,6 +211,7 @@ Other API Changes - Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`) - Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) - Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`) +- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) .. _whatsnew_0230.deprecations: diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index b37e5dc620260..448b8f482c6e6 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # cython: profile=False import collections +import re import sys cdef bint PY3 = (sys.version_info[0] >= 3) @@ -235,6 +236,25 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: return (base *m) + (frac *m) +cpdef match_iso_format(object ts): + """ + Match a provided string against an ISO 8601 pattern, providing a group for + each ``Timedelta`` component. + """ + pater = re.compile(r"""P + (?P-?[0-9]*)DT + (?P[0-9]{1,2})H + (?P[0-9]{1,2})M + (?P[0-9]{0,2}) + (\. + (?P[0-9]{0,3}) + (?P[0-9]{0,3}) + (?P[0-9]{0,3}) + )?S""", re.VERBOSE) + + return re.match(pater, ts) + + cdef inline parse_timedelta_string(object ts): """ Parse a regular format timedelta string. Return an int64_t (in ns) @@ -506,6 +526,33 @@ def _binary_op_method_timedeltalike(op, name): # ---------------------------------------------------------------------- # Timedelta Construction +def _value_from_iso_match(match): + """ + Extracts and cleanses the appropriate values from a match object with + groups for each component of an ISO 8601 duration + + Parameters + ---------- + match: + Regular expression with groups for each component of an ISO 8601 + duration + + Returns + ------- + int + Precision in nanoseconds of matched ISO 8601 duration + """ + match_dict = {k: v for k, v in match.groupdict().items() if v} + for comp in ['milliseconds', 'microseconds', 'nanoseconds']: + if comp in match_dict: + match_dict[comp] ='{:0<3}'.format(match_dict[comp]) + + match_dict = {k: int(v) for k, v in match_dict.items()} + nano = match_dict.pop('nanoseconds', 0) + + return nano + convert_to_timedelta64(timedelta(**match_dict), 'ns') + + cdef _to_py_int_float(v): # Note: This used to be defined inside Timedelta.__new__ # but cython will not allow `cdef` functions to be defined dynamically. @@ -825,7 +872,11 @@ class Timedelta(_Timedelta): if isinstance(value, Timedelta): value = value.value elif is_string_object(value): - value = np.timedelta64(parse_timedelta_string(value)) + if len(value) > 0 and value[0] == 'P': # hackish + match = match_iso_format(value) + value = _value_from_iso_match(match) + else: + value = np.timedelta64(parse_timedelta_string(value)) elif PyDelta_Check(value): value = convert_to_timedelta64(value, 'ns') elif is_timedelta64_object(value): diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index c260700c9473b..c3aa9f5923e58 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -853,3 +853,18 @@ def test_isoformat(self): result = Timedelta(minutes=1).isoformat() expected = 'P0DT0H1M0S' assert result == expected + + @pytest.mark.parametrize('fmt,exp', [ + ('P6DT0H50M3.010010012S', Timedelta(days=6, minutes=50, seconds=3, + milliseconds=10, microseconds=10, + nanoseconds=12)), + ('P-6DT0H50M3.010010012S', Timedelta(days=-6, minutes=50, seconds=3, + milliseconds=10, microseconds=10, + nanoseconds=12)), + ('P4DT12H30M5S', Timedelta(days=4, hours=12, minutes=30, seconds=5)), + ('P0DT0H0M0.000000123S', Timedelta(nanoseconds=123)), + ('P0DT0H0M0.00001S', Timedelta(microseconds=10)), + ('P0DT0H0M0.001S', Timedelta(milliseconds=1)), + ('P0DT0H1M0S', Timedelta(minutes=1))]) + def test_iso_constructor(self, fmt, exp): + assert Timedelta(fmt) == exp From 6bab9dab85314918d01019f59944e7f278222a28 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 4 Jan 2018 09:04:20 -0800 Subject: [PATCH 2/2] ISO 8601 Duration Refactoring --- doc/source/timedeltas.rst | 1 + pandas/_libs/tslibs/timedeltas.pyx | 75 ++++++++++++++------------- pandas/tests/scalar/test_timedelta.py | 13 ++++- 3 files changed, 53 insertions(+), 36 deletions(-) diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst index 5c6652bf1f5b2..50cff4c7bbdfb 100644 --- a/doc/source/timedeltas.rst +++ b/doc/source/timedeltas.rst @@ -67,6 +67,7 @@ You can construct a ``Timedelta`` scalar through various arguments: pd.Timedelta('P0DT0H0M0.000000123S') .. versionadded:: 0.23.0 + Added constructor for `ISO 8601 Duration`_ strings :ref:`DateOffsets` (``Day, Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction. diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 448b8f482c6e6..af3fa738fad14 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -236,25 +236,6 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: return (base *m) + (frac *m) -cpdef match_iso_format(object ts): - """ - Match a provided string against an ISO 8601 pattern, providing a group for - each ``Timedelta`` component. - """ - pater = re.compile(r"""P - (?P-?[0-9]*)DT - (?P[0-9]{1,2})H - (?P[0-9]{1,2})M - (?P[0-9]{0,2}) - (\. - (?P[0-9]{0,3}) - (?P[0-9]{0,3}) - (?P[0-9]{0,3}) - )?S""", re.VERBOSE) - - return re.match(pater, ts) - - cdef inline parse_timedelta_string(object ts): """ Parse a regular format timedelta string. Return an int64_t (in ns) @@ -526,31 +507,55 @@ def _binary_op_method_timedeltalike(op, name): # ---------------------------------------------------------------------- # Timedelta Construction -def _value_from_iso_match(match): +iso_pater = re.compile(r"""P + (?P-?[0-9]*)DT + (?P[0-9]{1,2})H + (?P[0-9]{1,2})M + (?P[0-9]{0,2}) + (\. + (?P[0-9]{1,3}) + (?P[0-9]{0,3}) + (?P[0-9]{0,3}) + )?S""", re.VERBOSE) + + +cdef int64_t parse_iso_format_string(object iso_fmt) except? -1: """ Extracts and cleanses the appropriate values from a match object with groups for each component of an ISO 8601 duration Parameters ---------- - match: - Regular expression with groups for each component of an ISO 8601 - duration + iso_fmt: + ISO 8601 Duration formatted string Returns ------- - int + ns: int64_t Precision in nanoseconds of matched ISO 8601 duration + + Raises + ------ + ValueError + If ``iso_fmt`` cannot be parsed """ - match_dict = {k: v for k, v in match.groupdict().items() if v} - for comp in ['milliseconds', 'microseconds', 'nanoseconds']: - if comp in match_dict: - match_dict[comp] ='{:0<3}'.format(match_dict[comp]) - match_dict = {k: int(v) for k, v in match_dict.items()} - nano = match_dict.pop('nanoseconds', 0) + cdef int64_t ns = 0 + + match = re.match(iso_pater, iso_fmt) + if match: + match_dict = match.groupdict(default='0') + for comp in ['milliseconds', 'microseconds', 'nanoseconds']: + match_dict[comp] = '{:0<3}'.format(match_dict[comp]) + + for k, v in match_dict.items(): + ns += timedelta_from_spec(v, '0', k) + + else: + raise ValueError("Invalid ISO 8601 Duration format - " + "{}".format(iso_fmt)) - return nano + convert_to_timedelta64(timedelta(**match_dict), 'ns') + return ns cdef _to_py_int_float(v): @@ -872,11 +877,11 @@ class Timedelta(_Timedelta): if isinstance(value, Timedelta): value = value.value elif is_string_object(value): - if len(value) > 0 and value[0] == 'P': # hackish - match = match_iso_format(value) - value = _value_from_iso_match(match) + if len(value) > 0 and value[0] == 'P': + value = parse_iso_format_string(value) else: - value = np.timedelta64(parse_timedelta_string(value)) + value = parse_timedelta_string(value) + value = np.timedelta64(value) elif PyDelta_Check(value): value = convert_to_timedelta64(value, 'ns') elif is_timedelta64_object(value): diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index c3aa9f5923e58..310555c19ea99 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -865,6 +865,17 @@ def test_isoformat(self): ('P0DT0H0M0.000000123S', Timedelta(nanoseconds=123)), ('P0DT0H0M0.00001S', Timedelta(microseconds=10)), ('P0DT0H0M0.001S', Timedelta(milliseconds=1)), - ('P0DT0H1M0S', Timedelta(minutes=1))]) + ('P0DT0H1M0S', Timedelta(minutes=1)), + ('P1DT25H61M61S', Timedelta(days=1, hours=25, minutes=61, seconds=61)) + ]) def test_iso_constructor(self, fmt, exp): assert Timedelta(fmt) == exp + + @pytest.mark.parametrize('fmt', [ + 'PPPPPPPPPPPP', 'PDTHMS', 'P0DT999H999M999S', + 'P1DT0H0M0.0000000000000S', 'P1DT0H0M00000000000S', + 'P1DT0H0M0.S']) + def test_iso_constructor_raises(self, fmt): + with tm.assert_raises_regex(ValueError, 'Invalid ISO 8601 Duration ' + 'format - {}'.format(fmt)): + Timedelta(fmt)