Skip to content

Added ISO 8601 Duration string constructor for Timedelta #19065

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions asv_bench/benchmarks/timedelta.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,40 @@
import datetime

import numpy as np
import pandas as pd

from pandas import to_timedelta, Timestamp, Timedelta


class TimedeltaConstructor(object):
goal_time = 0.2

def time_from_int(self):
Timedelta(123456789)

def time_from_unit(self):
Timedelta(1, unit='d')

def time_from_components(self):
Timedelta(days=1, hours=2, minutes=3, seconds=4, milliseconds=5,
microseconds=6, nanoseconds=7)

def time_from_datetime_timedelta(self):
Timedelta(datetime.timedelta(days=1, seconds=1))

def time_from_np_timedelta(self):
Timedelta(np.timedelta64(1, 'ms'))

def time_from_string(self):
Timedelta('1 days')

def time_from_iso_format(self):
Timedelta('P4DT12H30M5S')

def time_from_missing(self):
Timedelta('nat')


class ToTimedelta(object):
goal_time = 0.2

Expand Down
8 changes: 8 additions & 0 deletions doc/source/timedeltas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ You can construct a ``Timedelta`` scalar through various arguments:
pd.Timedelta('nan')
pd.Timedelta('nat')

# ISO 8601 Duration strings
pd.Timedelta('P0DT0H1M0S')
pd.Timedelta('P0DT0H0M0.000000123S')

.. versionadded:: 0.23.0

Added constructor for `ISO 8601 Duration`_ strings

:ref:`DateOffsets<timeseries.offsets>` (``Day, Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction.

.. ipython:: python
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ Other API Changes
- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`)
- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`)
- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`)
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)

.. _whatsnew_0230.deprecations:

Expand Down
58 changes: 57 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
# cython: profile=False
import collections
import re

import sys
cdef bint PY3 = (sys.version_info[0] >= 3)
Expand Down Expand Up @@ -506,6 +507,57 @@ def _binary_op_method_timedeltalike(op, name):
# ----------------------------------------------------------------------
# Timedelta Construction

iso_pater = re.compile(r"""P
(?P<days>-?[0-9]*)DT
(?P<hours>[0-9]{1,2})H
(?P<minutes>[0-9]{1,2})M
(?P<seconds>[0-9]{0,2})
(\.
(?P<milliseconds>[0-9]{1,3})
(?P<microseconds>[0-9]{0,3})
(?P<nanoseconds>[0-9]{0,3})
)?S""", re.VERBOSE)


cdef int64_t parse_iso_format_string(object iso_fmt) except? -1:
"""
Extracts and cleanses the appropriate values from a match object with
groups for each component of an ISO 8601 duration

Parameters
----------
iso_fmt:
ISO 8601 Duration formatted string

Returns
-------
ns: int64_t
Precision in nanoseconds of matched ISO 8601 duration

Raises
------
ValueError
If ``iso_fmt`` cannot be parsed
"""

cdef int64_t ns = 0

match = re.match(iso_pater, iso_fmt)
if match:
match_dict = match.groupdict(default='0')
for comp in ['milliseconds', 'microseconds', 'nanoseconds']:
match_dict[comp] = '{:0<3}'.format(match_dict[comp])

for k, v in match_dict.items():
ns += timedelta_from_spec(v, '0', k)

else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should just raise here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not an expert on Cython but pytest was failing when raising the exception inside of this function. I believe it is attributable to the int64_t function declaration. I could remove that type and raising the exception directly inside the function would work. Otherwise, I was getting the below output during testing.

-------------------------------------------------------------- Captured stderr call --------------------------------------------------------------
AttributeError: 'NoneType' object has no attribute 'groupdict'
Exception ignored in: 'pandas._libs.tslibs.timedeltas.parse_iso_format_string'
AttributeError: 'NoneType' object has no attribute 'groupdict'

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, you just need to add:
except? -1: to the declaration; this tells cython that you may raise in a cdef function so it should check. you don't need to explicity return -1 though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah OK thanks. Will make changes and re-push

raise ValueError("Invalid ISO 8601 Duration format - "
"{}".format(iso_fmt))

return ns


cdef _to_py_int_float(v):
# Note: This used to be defined inside Timedelta.__new__
# but cython will not allow `cdef` functions to be defined dynamically.
Expand Down Expand Up @@ -825,7 +877,11 @@ class Timedelta(_Timedelta):
if isinstance(value, Timedelta):
value = value.value
elif is_string_object(value):
value = np.timedelta64(parse_timedelta_string(value))
if len(value) > 0 and value[0] == 'P':
value = parse_iso_format_string(value)
else:
value = parse_timedelta_string(value)
value = np.timedelta64(value)
elif PyDelta_Check(value):
value = convert_to_timedelta64(value, 'ns')
elif is_timedelta64_object(value):
Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/scalar/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -853,3 +853,29 @@ def test_isoformat(self):
result = Timedelta(minutes=1).isoformat()
expected = 'P0DT0H1M0S'
assert result == expected

@pytest.mark.parametrize('fmt,exp', [
('P6DT0H50M3.010010012S', Timedelta(days=6, minutes=50, seconds=3,
milliseconds=10, microseconds=10,
nanoseconds=12)),
('P-6DT0H50M3.010010012S', Timedelta(days=-6, minutes=50, seconds=3,
milliseconds=10, microseconds=10,
nanoseconds=12)),
('P4DT12H30M5S', Timedelta(days=4, hours=12, minutes=30, seconds=5)),
('P0DT0H0M0.000000123S', Timedelta(nanoseconds=123)),
('P0DT0H0M0.00001S', Timedelta(microseconds=10)),
('P0DT0H0M0.001S', Timedelta(milliseconds=1)),
('P0DT0H1M0S', Timedelta(minutes=1)),
('P1DT25H61M61S', Timedelta(days=1, hours=25, minutes=61, seconds=61))
])
def test_iso_constructor(self, fmt, exp):
assert Timedelta(fmt) == exp

@pytest.mark.parametrize('fmt', [
'PPPPPPPPPPPP', 'PDTHMS', 'P0DT999H999M999S',
'P1DT0H0M0.0000000000000S', 'P1DT0H0M00000000000S',
'P1DT0H0M0.S'])
def test_iso_constructor_raises(self, fmt):
with tm.assert_raises_regex(ValueError, 'Invalid ISO 8601 Duration '
'format - {}'.format(fmt)):
Timedelta(fmt)