From b6c4a5c9464567ec9187f1be2e47ab7abeb27955 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sun, 31 Jan 2021 01:04:27 +0100 Subject: [PATCH 1/5] ENH: Enable parsing of ISO8601-like timestamps with negative signs (GH37172) --- pandas/_libs/tslibs/timedeltas.pyx | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 25991cfbdb7a7..d8858224d9c64 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -275,7 +275,7 @@ cdef convert_to_timedelta64(object ts, str unit): ts = cast_from_unit(ts, unit) ts = np.timedelta64(ts, "ns") elif isinstance(ts, str): - if len(ts) > 0 and ts[0] == "P": + if len(ts) > 0 and (ts[0] == "P" or ts[:2] == "-P"): ts = parse_iso_format_string(ts) else: ts = parse_timedelta_string(ts) @@ -672,18 +672,23 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: cdef: unicode c int64_t result = 0, r - int p = 0 + int p = 0, sign = 1 object dec_unit = 'ms', err_msg - bint have_dot = 0, have_value = 0, neg = 0 + bint have_dot = 0, have_value = 0, neg = 0, valid_ts = 0 list number = [], unit = [] err_msg = f"Invalid ISO 8601 Duration format - {ts}" + if ts[0] == "-": + sign = -1 + ts = ts[1:] + for c in ts: # number (ascii codes) if 48 <= ord(c) <= 57: have_value = 1 + valid_ts = 1 if have_dot: if p == 3 and dec_unit != 'ns': unit.append(dec_unit) @@ -703,6 +708,7 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: neg = 0 unit, number = [], [c] else: + have_value = 0 if c == 'P' or c == 'T': pass # ignore marking characters P and T elif c == '-': @@ -710,6 +716,8 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: raise ValueError(err_msg) else: neg = 1 + elif c == "+": + pass elif c in ['W', 'D', 'H', 'M']: if c in ['H', 'M'] and len(number) > 2: raise ValueError(err_msg) @@ -746,11 +754,11 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: else: raise ValueError(err_msg) - if not have_value: + if not valid_ts: # Received string only - never parsed any values raise ValueError(err_msg) - return result + return sign*result cdef _to_py_int_float(v): @@ -1251,7 +1259,7 @@ class Timedelta(_Timedelta): elif isinstance(value, str): if unit is not None: raise ValueError("unit must not be specified if the value is a str") - if len(value) > 0 and value[0] == 'P': + if len(value) > 0 and (value[0] == 'P' or value[:2] == "-P"): value = parse_iso_format_string(value) else: value = parse_timedelta_string(value) From 5b1995c7e15a8b26f58f6a629a532276cce939a9 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sun, 31 Jan 2021 01:21:03 +0100 Subject: [PATCH 2/5] Add tests and whatsnew entry --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/tests/scalar/timedelta/test_constructors.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index f0680bebdafe5..f7f8b7f1379d7 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -55,6 +55,7 @@ Other enhancements - :meth:`DataFrame.plot.scatter` can now accept a categorical column as the argument to ``c`` (:issue:`12380`, :issue:`31357`) - :meth:`.Styler.set_tooltips` allows on hover tooltips to be added to styled HTML dataframes. - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`) +- Add support for parsing ISO8601-like timestamps with negative signs to :meth:`pandas.Timedelta` (:issue:`37172`) .. --------------------------------------------------------------------------- diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 64d5a5e9b3fff..a7a86dc97402d 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -263,6 +263,9 @@ def test_construction_out_of_bounds_td64(): ("P1W", Timedelta(days=7)), ("PT300S", Timedelta(seconds=300)), ("P1DT0H0M00000000000S", Timedelta(days=1)), + ("PT-6H3M", Timedelta(hours=-6, minutes=3)), + ("-PT6H3M", Timedelta(hours=-6, minutes=-3)), + ("-PT-6H+3M", Timedelta(hours=6, minutes=-3)), ], ) def test_iso_constructor(fmt, exp): From e578feabef28aed204749c0855ae616e91578f4e Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Fri, 5 Feb 2021 22:10:31 +0100 Subject: [PATCH 3/5] Address comments --- pandas/_libs/tslibs/timedeltas.pyx | 12 ++++++------ pandas/tests/scalar/timedelta/test_constructors.py | 12 +++++++++++- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index d8858224d9c64..d1c9747f1f340 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -275,7 +275,7 @@ cdef convert_to_timedelta64(object ts, str unit): ts = cast_from_unit(ts, unit) ts = np.timedelta64(ts, "ns") elif isinstance(ts, str): - if len(ts) > 0 and (ts[0] == "P" or ts[:2] == "-P"): + if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"): ts = parse_iso_format_string(ts) else: ts = parse_timedelta_string(ts) @@ -674,7 +674,7 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: int64_t result = 0, r int p = 0, sign = 1 object dec_unit = 'ms', err_msg - bint have_dot = 0, have_value = 0, neg = 0, valid_ts = 0 + bint have_dot = 0, have_value = 0, neg = 0 list number = [], unit = [] err_msg = f"Invalid ISO 8601 Duration format - {ts}" @@ -688,7 +688,6 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: if 48 <= ord(c) <= 57: have_value = 1 - valid_ts = 1 if have_dot: if p == 3 and dec_unit != 'ns': unit.append(dec_unit) @@ -708,7 +707,6 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: neg = 0 unit, number = [], [c] else: - have_value = 0 if c == 'P' or c == 'T': pass # ignore marking characters P and T elif c == '-': @@ -754,7 +752,7 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: else: raise ValueError(err_msg) - if not valid_ts: + if not have_value: # Received string only - never parsed any values raise ValueError(err_msg) @@ -1259,7 +1257,9 @@ class Timedelta(_Timedelta): elif isinstance(value, str): if unit is not None: raise ValueError("unit must not be specified if the value is a str") - if len(value) > 0 and (value[0] == 'P' or value[:2] == "-P"): + if (len(value) > 0 and value[0] == 'P') or ( + len(value) > 1 and value[:2] == '-P' + ): value = parse_iso_format_string(value) else: value = parse_timedelta_string(value) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index a7a86dc97402d..4dbc7e8e12ec2 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -1,3 +1,4 @@ +from contextlib import nullcontext from datetime import timedelta import numpy as np @@ -266,10 +267,19 @@ def test_construction_out_of_bounds_td64(): ("PT-6H3M", Timedelta(hours=-6, minutes=3)), ("-PT6H3M", Timedelta(hours=-6, minutes=-3)), ("-PT-6H+3M", Timedelta(hours=6, minutes=-3)), + ("P", True), + ("-P", True), ], ) def test_iso_constructor(fmt, exp): - assert Timedelta(fmt) == exp + cm = ( + pytest.raises(ValueError, match=f"Invalid ISO 8601 Duration format - {fmt}") + if exp is True + else nullcontext() + ) + + with cm: + assert Timedelta(fmt) == exp @pytest.mark.parametrize( From 44c57ea46fda681bee33dd6ff2ad5ababc1ca440 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Fri, 5 Feb 2021 22:29:23 +0100 Subject: [PATCH 4/5] Update whatsnew entry --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b43029e0bf425..95aec64109489 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -57,7 +57,7 @@ Other enhancements - :meth:`.Styler.set_tooltips_class` and :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`) - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`) - :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files. -- Add support for parsing ISO8601-like timestamps with negative signs to :meth:`pandas.Timedelta` (:issue:`37172`) +- Add support for parsing ``ISO 8601``-like timestamps with negative signs to :meth:`pandas.Timedelta` (:issue:`37172`) .. --------------------------------------------------------------------------- From d4c99f21dcf5699571fcc41a4da5a1c78f1d79a9 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Fri, 5 Feb 2021 23:03:53 +0100 Subject: [PATCH 5/5] Update tests --- pandas/tests/scalar/timedelta/test_constructors.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 4dbc7e8e12ec2..de7a0dc97d565 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -1,4 +1,3 @@ -from contextlib import nullcontext from datetime import timedelta import numpy as np @@ -267,19 +266,10 @@ def test_construction_out_of_bounds_td64(): ("PT-6H3M", Timedelta(hours=-6, minutes=3)), ("-PT6H3M", Timedelta(hours=-6, minutes=-3)), ("-PT-6H+3M", Timedelta(hours=6, minutes=-3)), - ("P", True), - ("-P", True), ], ) def test_iso_constructor(fmt, exp): - cm = ( - pytest.raises(ValueError, match=f"Invalid ISO 8601 Duration format - {fmt}") - if exp is True - else nullcontext() - ) - - with cm: - assert Timedelta(fmt) == exp + assert Timedelta(fmt) == exp @pytest.mark.parametrize( @@ -290,6 +280,8 @@ def test_iso_constructor(fmt, exp): "P0DT999H999M999S", "P1DT0H0M0.0000000000000S", "P1DT0H0M0.S", + "P", + "-P", ], ) def test_iso_constructor_raises(fmt):