From 7ab927948e15551410a3e56b5222d23dc6c4522a Mon Sep 17 00:00:00 2001 From: Victor Date: Fri, 8 Jun 2018 11:39:11 -0300 Subject: [PATCH 01/22] Accepts integer/float string with units and raises when unit is ambiguous. --- pandas/_libs/tslibs/timedeltas.pyx | 214 +++++------------- .../scalar/timedelta/test_construction.py | 27 +++ 2 files changed, 80 insertions(+), 161 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 9c8be1901d1dc..cb0534cbb6d0d 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +# cython: profile=False import collections import textwrap import warnings @@ -6,13 +7,13 @@ import warnings import sys cdef bint PY3 = (sys.version_info[0] >= 3) -from cython import Py_ssize_t +from cython cimport Py_ssize_t -from cpython cimport Py_NE, Py_EQ, PyObject_RichCompare +from cpython cimport PyUnicode_Check, Py_NE, Py_EQ, PyObject_RichCompare import numpy as np cimport numpy as cnp -from numpy cimport int64_t +from numpy cimport int64_t, ndarray cnp.import_array() from cpython.datetime cimport (datetime, timedelta, @@ -32,7 +33,6 @@ from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct, from nattype import nat_strings, NaT from nattype cimport checknull_with_nat, NPY_NAT -from offsets cimport to_offset # ---------------------------------------------------------------------- # Constants @@ -78,44 +78,6 @@ cdef dict timedelta_abbrevs = { 'D': 'd', _no_input = object() - -# ---------------------------------------------------------------------- -# API - -def ints_to_pytimedelta(int64_t[:] arr, box=False): - """ - convert an i8 repr to an ndarray of timedelta or Timedelta (if box == - True) - - Parameters - ---------- - arr : ndarray[int64_t] - box : bool, default False - - Returns - ------- - result : ndarray[object] - array of Timedelta or timedeltas objects - """ - cdef: - Py_ssize_t i, n = len(arr) - int64_t value - object[:] result = np.empty(n, dtype=object) - - for i in range(n): - - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - if box: - result[i] = Timedelta(value) - else: - result[i] = timedelta(microseconds=int(value) / 1000) - - return result.base # .base to access underlying np.ndarray - - # ---------------------------------------------------------------------- cpdef int64_t delta_to_nanoseconds(delta) except? -1: @@ -161,7 +123,7 @@ cpdef convert_to_timedelta64(object ts, object unit): if ts.astype('int64') == NPY_NAT: return np.timedelta64(NPY_NAT) elif is_timedelta64_object(ts): - ts = ts.astype("m8[{unit}]".format(unit=unit.lower())) + ts = ts.astype("m8[{0}]".format(unit.lower())) elif is_integer_object(ts): if ts == NPY_NAT: return np.timedelta64(NPY_NAT) @@ -182,11 +144,7 @@ cpdef convert_to_timedelta64(object ts, object unit): ts = cast_from_unit(ts, unit) ts = np.timedelta64(ts) elif is_string_object(ts): - if len(ts) > 0 and ts[0] == 'P': - ts = parse_iso_format_string(ts) - else: - ts = parse_timedelta_string(ts) - ts = np.timedelta64(ts) + ts = np.timedelta64(parse_timedelta_string(ts)) elif hasattr(ts, 'delta'): ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns') @@ -198,7 +156,7 @@ cpdef convert_to_timedelta64(object ts, object unit): return ts.astype('timedelta64[ns]') -cpdef array_to_timedelta64(object[:] values, unit='ns', errors='raise'): +cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. @@ -206,7 +164,7 @@ cpdef array_to_timedelta64(object[:] values, unit='ns', errors='raise'): cdef: Py_ssize_t i, n - int64_t[:] iresult + ndarray[int64_t] iresult if errors not in ('ignore', 'raise', 'coerce'): raise ValueError("errors must be one of 'ignore', " @@ -232,7 +190,7 @@ cpdef array_to_timedelta64(object[:] values, unit='ns', errors='raise'): else: raise - return iresult.base # .base to access underlying np.ndarray + return iresult cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: @@ -264,7 +222,7 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: m = 1L p = 0 else: - raise ValueError("cannot cast unit {unit}".format(unit=unit)) + raise ValueError("cannot cast unit {0}".format(unit)) # just give me the unit back if ts is None: @@ -272,22 +230,22 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: # cast the unit, multiply base/frace separately # to avoid precision issues from float -> int - base = ts + base = ts frac = ts - base if p: frac = round(frac, p) - return (base * m) + (frac * m) + return (base * m) + (frac * m) cdef inline _decode_if_necessary(object ts): # decode ts if necessary - if not isinstance(ts, unicode) and not PY3: + if not PyUnicode_Check(ts) and not PY3: ts = str(ts).decode('utf-8') return ts -cdef inline parse_timedelta_string(object ts): +cdef inline parse_timedelta_string(object ts, object specified_unit=None): """ Parse a regular format timedelta string. Return an int64_t (in ns) or raise a ValueError on an invalid parse. @@ -295,10 +253,11 @@ cdef inline parse_timedelta_string(object ts): cdef: unicode c - bint neg = 0, have_dot = 0, have_value = 0, have_hhmmss = 0 - object current_unit = None - int64_t result = 0, m = 0, r - list number = [], frac = [], unit = [] + bint neg=0, have_dot=0, have_value=0, have_hhmmss=0 + object current_unit=None + object fallback_unit=None + int64_t result=0, m=0, r + list number=[], frac=[], unit=[] # neg : tracks if we have a leading negative for the value # have_dot : tracks if we are processing a dot (either post hhmmss or @@ -373,7 +332,7 @@ cdef inline parse_timedelta_string(object ts): have_hhmmss = 1 else: raise ValueError("expecting hh:mm:ss format, " - "received: {ts}".format(ts=ts)) + "received: {0}".format(ts)) unit, number = [], [] @@ -448,9 +407,16 @@ cdef inline parse_timedelta_string(object ts): if have_value: raise ValueError("have leftover units") if len(number): - r = timedelta_from_spec(number, frac, 'ns') + if specified_unit is None: + fallback_unit = 'ns' + else: + fallback_unit = specified_unit + r = timedelta_from_spec(number, frac, fallback_unit) result += timedelta_as_neg(r, neg) + if (specified_unit is not None) and (fallback_unit is None): + raise ValueError("unit was specified but is redundant/ambiguous") + return result @@ -482,7 +448,7 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): unit = ''.join(unit) unit = timedelta_abbrevs[unit.lower()] except KeyError: - raise ValueError("invalid abbreviation: {unit}".format(unit=unit)) + raise ValueError("invalid abbreviation: {0}".format(unit)) n = ''.join(number) + '.' + ''.join(frac) return cast_from_unit(float(n), unit) @@ -541,12 +507,10 @@ def _binary_op_method_timedeltalike(op, name): elif hasattr(other, 'dtype'): # nd-array like - if other.dtype.kind in ['m', 'M']: - return op(self.to_timedelta64(), other) - elif other.dtype.kind == 'O': - return np.array([op(self, x) for x in other]) - else: + if other.dtype.kind not in ['m', 'M']: + # raise rathering than letting numpy return wrong answer return NotImplemented + return op(self.to_timedelta64(), other) elif not _validate_ops_compat(other): return NotImplemented @@ -593,10 +557,10 @@ cdef inline int64_t parse_iso_format_string(object ts) except? -1: cdef: unicode c int64_t result = 0, r - int p = 0 + int p=0 object dec_unit = 'ms', err_msg - bint have_dot = 0, have_value = 0, neg = 0 - list number = [], unit = [] + bint have_dot=0, have_value=0, neg=0 + list number=[], unit=[] ts = _decode_if_necessary(ts) @@ -683,8 +647,8 @@ cdef _to_py_int_float(v): return int(v) elif is_float_object(v): return float(v) - raise TypeError("Invalid type {typ}. Must be int or " - "float.".format(typ=type(v))) + raise TypeError("Invalid type {0}. Must be int or " + "float.".format(type(v))) # Similar to Timestamp/datetime, this is a construction requirement for @@ -730,10 +694,9 @@ cdef class _Timedelta(timedelta): return True # only allow ==, != ops - raise TypeError('Cannot compare type {cls} with ' - 'type {other}' - .format(cls=type(self).__name__, - other=type(other).__name__)) + raise TypeError('Cannot compare type {!r} with type ' \ + '{!r}'.format(type(self).__name__, + type(other).__name__)) if util.is_array(other): return PyObject_RichCompare(np.array([self]), other, op) return PyObject_RichCompare(other, self, reverse_ops[op]) @@ -742,9 +705,9 @@ cdef class _Timedelta(timedelta): return False elif op == Py_NE: return True - raise TypeError('Cannot compare type {cls} with type {other}' - .format(cls=type(self).__name__, - other=type(other).__name__)) + raise TypeError('Cannot compare type {!r} with type ' \ + '{!r}'.format(type(self).__name__, + type(other).__name__)) return cmp_scalar(self.value, ots.value, op) @@ -835,81 +798,12 @@ cdef class _Timedelta(timedelta): @property def asm8(self): - """ - Return a numpy timedelta64 array scalar view. - - Provides access to the array scalar view (i.e. a combination of the - value and the units) associated with the numpy.timedelta64().view(), - including a 64-bit integer representation of the timedelta in - nanoseconds (Python int compatible). - - Returns - ------- - numpy timedelta64 array scalar view - Array scalar view of the timedelta in nanoseconds. - - Examples - -------- - >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') - >>> td.asm8 - numpy.timedelta64(86520000003042,'ns') - - >>> td = pd.Timedelta('2 min 3 s') - >>> td.asm8 - numpy.timedelta64(123000000000,'ns') - - >>> td = pd.Timedelta('3 ms 5 us') - >>> td.asm8 - numpy.timedelta64(3005000,'ns') - - >>> td = pd.Timedelta(42, unit='ns') - >>> td.asm8 - numpy.timedelta64(42,'ns') - """ + """ return a numpy timedelta64 array view of myself """ return np.int64(self.value).view('m8[ns]') @property def resolution(self): - """ - Return a string representing the lowest timedelta resolution. - - Each timedelta has a defined resolution that represents the lowest OR - most granular level of precision. Each level of resolution is - represented by a short string as defined below: - - Resolution: Return value - - * Days: 'D' - * Hours: 'H' - * Minutes: 'T' - * Seconds: 'S' - * Milliseconds: 'L' - * Microseconds: 'U' - * Nanoseconds: 'N' - - Returns - ------- - str - Timedelta resolution. - - Examples - -------- - >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') - >>> td.resolution - 'N' - - >>> td = pd.Timedelta('1 days 2 min 3 us') - >>> td.resolution - 'U' - - >>> td = pd.Timedelta('2 min 3 s') - >>> td.resolution - 'S' - - >>> td = pd.Timedelta(36, unit='us') - >>> td.resolution - 'U' - """ + """ return a string representing the lowest resolution that we have """ self._ensure_components() if self._ns: @@ -931,7 +825,7 @@ cdef class _Timedelta(timedelta): def nanoseconds(self): """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. - + Returns ------- int @@ -982,8 +876,8 @@ cdef class _Timedelta(timedelta): sign = " " if format == 'all': - fmt = ("{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}." - "{milliseconds:03}{microseconds:03}{nanoseconds:03}") + fmt = "{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}." \ + "{milliseconds:03}{microseconds:03}{nanoseconds:03}" else: # if we have a partial day subs = (self._h or self._m or self._s or @@ -1008,14 +902,11 @@ cdef class _Timedelta(timedelta): return fmt.format(**comp_dict) def __repr__(self): - return "Timedelta('{val}')".format(val=self._repr_base(format='long')) + return "Timedelta('{0}')".format(self._repr_base(format='long')) def __str__(self): return self._repr_base(format='long') - def __bool__(self): - return self.value != 0 - def isoformat(self): """ Format Timedelta as ISO 8601 Duration like @@ -1062,8 +953,8 @@ cdef class _Timedelta(timedelta): components.nanoseconds) # Trim unnecessary 0s, 1.000000000 -> 1 seconds = seconds.rstrip('0').rstrip('.') - tpl = ('P{td.days}DT{td.hours}H{td.minutes}M{seconds}S' - .format(td=components, seconds=seconds)) + tpl = 'P{td.days}DT{td.hours}H{td.minutes}M{seconds}S'.format( + td=components, seconds=seconds) return tpl @@ -1164,6 +1055,7 @@ class Timedelta(_Timedelta): cdef: int64_t result, unit + from pandas.tseries.frequencies import to_offset unit = to_offset(freq).nanos result = unit * rounder(self.value / float(unit)) return Timedelta(result, unit='ns') @@ -1370,7 +1262,7 @@ class Timedelta(_Timedelta): '{op}'.format(dtype=other.dtype, op='__floordiv__')) - elif is_float_object(other) and util.is_nan(other): + elif is_float_object(other) and util._checknull(other): # i.e. np.nan return NotImplemented diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index d648140aa7347..0283b70350bf6 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -210,3 +210,30 @@ def test_td_constructor_on_nanoseconds(constructed_td, conversion): def test_td_constructor_value_error(): with pytest.raises(TypeError): Timedelta(nanoseconds='abc') + + +class not_raises(object): + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + +@pytest.mark.parametrize("redundant_unit, expectation", [ + ("", not_raises()), + ("d", pytest.raises(ValueError)), + ("us", pytest.raises(ValueError)), +]) +@pytest.mark.parametrize("unit", [ + "d", "m", "s", "us" +]) +@pytest.mark.parametrize("sign", [ + +1, -1 +]) +@pytest.mark.parametrize("num", [ + 0.001, 1, 10 +]) +def test_string_with_unit(num, sign, unit, redundant_unit, expectation): + with expectation: + assert Timedelta(str(sign*num)+redundant_unit, unit=unit) == Timedelta(sign*num, unit=unit) From 98a41f45760b5257c2e95a341c3fbd7c1cef8b0a Mon Sep 17 00:00:00 2001 From: Victor Date: Fri, 8 Jun 2018 11:49:54 -0300 Subject: [PATCH 02/22] Fixed PEP linting. --- .../tests/scalar/timedelta/test_construction.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index 0283b70350bf6..451c8df4c39fb 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -223,17 +223,14 @@ def __exit__(self, exc_type, exc_val, exc_tb): @pytest.mark.parametrize("redundant_unit, expectation", [ ("", not_raises()), ("d", pytest.raises(ValueError)), - ("us", pytest.raises(ValueError)), -]) + ("us", pytest.raises(ValueError))]) @pytest.mark.parametrize("unit", [ - "d", "m", "s", "us" -]) + "d", "m", "s", "us"]) @pytest.mark.parametrize("sign", [ - +1, -1 -]) + +1, -1]) @pytest.mark.parametrize("num", [ - 0.001, 1, 10 -]) + 0.001, 1, 10]) def test_string_with_unit(num, sign, unit, redundant_unit, expectation): with expectation: - assert Timedelta(str(sign*num)+redundant_unit, unit=unit) == Timedelta(sign*num, unit=unit) + assert Timedelta(str(sign * num) + redundant_unit, unit=unit) \ + == Timedelta(sign * num, unit=unit) From 23a9834e6033578a2c4a335151f8c4ac508df600 Mon Sep 17 00:00:00 2001 From: Victor Date: Fri, 8 Jun 2018 12:03:44 -0300 Subject: [PATCH 03/22] Updated function signature in pxd file. --- pandas/_libs/tslibs/timedeltas.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index eda4418902513..3ab70f55881b4 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -3,7 +3,7 @@ from numpy cimport int64_t # Exposed for tslib, not intended for outside use. -cdef parse_timedelta_string(object ts) +cdef parse_timedelta_string(object ts, object specified_unit=None) cpdef int64_t cast_from_unit(object ts, object unit) except? -1 cpdef int64_t delta_to_nanoseconds(delta) except? -1 cpdef convert_to_timedelta64(object ts, object unit) From d34140d89a06e915228e153a1c3bde30fc97d1b8 Mon Sep 17 00:00:00 2001 From: Victor Date: Fri, 8 Jun 2018 16:06:50 -0300 Subject: [PATCH 04/22] Passing tests. --- pandas/_libs/tslibs/timedeltas.pxd | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 23 +++++++++++++------ .../scalar/timedelta/test_construction.py | 10 +++----- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 3ab70f55881b4..dbdead1dba551 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -3,7 +3,7 @@ from numpy cimport int64_t # Exposed for tslib, not intended for outside use. -cdef parse_timedelta_string(object ts, object specified_unit=None) +cdef parse_timedelta_string(object ts, specified_unit=*) cpdef int64_t cast_from_unit(object ts, object unit) except? -1 cpdef int64_t delta_to_nanoseconds(delta) except? -1 cpdef convert_to_timedelta64(object ts, object unit) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index cb0534cbb6d0d..91bfb2435835e 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -245,7 +245,7 @@ cdef inline _decode_if_necessary(object ts): return ts -cdef inline parse_timedelta_string(object ts, object specified_unit=None): +cdef inline parse_timedelta_string(object ts, specified_unit=None): """ Parse a regular format timedelta string. Return an int64_t (in ns) or raise a ValueError on an invalid parse. @@ -407,10 +407,7 @@ cdef inline parse_timedelta_string(object ts, object specified_unit=None): if have_value: raise ValueError("have leftover units") if len(number): - if specified_unit is None: - fallback_unit = 'ns' - else: - fallback_unit = specified_unit + fallback_unit = 'ns' if specified_unit is None else specified_unit r = timedelta_from_spec(number, frac, fallback_unit) result += timedelta_as_neg(r, neg) @@ -1004,13 +1001,25 @@ class Timedelta(_Timedelta): "[weeks, days, hours, minutes, seconds, " "milliseconds, microseconds, nanoseconds]") + if isinstance(value, Timedelta): value = value.value elif is_string_object(value): - if len(value) > 0 and value[0] == 'P': + # Check if it is just a number in a string + try: + value = int(value) + except (ValueError, TypeError): + try: + value = float(value) + except (ValueError, TypeError): + pass + + if is_integer_object(value) or is_float_object(value): + value = convert_to_timedelta64(value, unit) + elif len(value) > 0 and value[0] == 'P': value = parse_iso_format_string(value) else: - value = parse_timedelta_string(value) + value = parse_timedelta_string(value, unit) value = np.timedelta64(value) elif PyDelta_Check(value): value = convert_to_timedelta64(value, 'ns') diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index 451c8df4c39fb..47f255b68fddb 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -85,10 +85,6 @@ def test_construction(): with pytest.raises(ValueError): Timedelta('10 days -1 h 1.5m 1s 3us') - # no units specified - with pytest.raises(ValueError): - Timedelta('3.1415') - # invalid construction tm.assert_raises_regex(ValueError, "cannot construct a Timedelta", lambda: Timedelta()) @@ -225,12 +221,12 @@ def __exit__(self, exc_type, exc_val, exc_tb): ("d", pytest.raises(ValueError)), ("us", pytest.raises(ValueError))]) @pytest.mark.parametrize("unit", [ - "d", "m", "s", "us"]) + "d", "m", "s", "us"]) @pytest.mark.parametrize("sign", [ - +1, -1]) + +1, -1]) @pytest.mark.parametrize("num", [ 0.001, 1, 10]) def test_string_with_unit(num, sign, unit, redundant_unit, expectation): with expectation: assert Timedelta(str(sign * num) + redundant_unit, unit=unit) \ - == Timedelta(sign * num, unit=unit) + == Timedelta(sign * num, unit=unit) From a2d68e68cf2f1527ffb370ecaeb25731191a86ff Mon Sep 17 00:00:00 2001 From: Victor Date: Fri, 8 Jun 2018 16:12:46 -0300 Subject: [PATCH 05/22] Fixed linting. --- pandas/tests/scalar/timedelta/test_construction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index 47f255b68fddb..2f6696a276ed7 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -228,5 +228,5 @@ def __exit__(self, exc_type, exc_val, exc_tb): 0.001, 1, 10]) def test_string_with_unit(num, sign, unit, redundant_unit, expectation): with expectation: - assert Timedelta(str(sign * num) + redundant_unit, unit=unit) \ - == Timedelta(sign * num, unit=unit) + assert Timedelta(str(sign * num) + redundant_unit, unit=unit)\ + == Timedelta(sign * num, unit=unit) From 521857ca384c8125c3f035b6d8d9ba42850a9d58 Mon Sep 17 00:00:00 2001 From: Victor Date: Fri, 8 Jun 2018 17:24:23 -0300 Subject: [PATCH 06/22] Removed excess space LINT --- pandas/_libs/tslibs/timedeltas.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 91bfb2435835e..cec07dbebdfa4 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1001,7 +1001,6 @@ class Timedelta(_Timedelta): "[weeks, days, hours, minutes, seconds, " "milliseconds, microseconds, nanoseconds]") - if isinstance(value, Timedelta): value = value.value elif is_string_object(value): From d8b71279207bad4d49b8fe499e526bd9fb5b8193 Mon Sep 17 00:00:00 2001 From: Victor Date: Sat, 9 Jun 2018 16:42:38 -0300 Subject: [PATCH 07/22] Fixed long line. --- pandas/tests/scalar/timedelta/test_construction.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index 2f6696a276ed7..79c742e97cc04 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -228,5 +228,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): 0.001, 1, 10]) def test_string_with_unit(num, sign, unit, redundant_unit, expectation): with expectation: - assert Timedelta(str(sign * num) + redundant_unit, unit=unit)\ - == Timedelta(sign * num, unit=unit) + val = sign * num + val_str = str(val) + redundant_unit + assert Timedelta(val_str, unit=unit) == Timedelta(val, unit=unit) From 0ddbcec95da918e909829d40a9407d746b144123 Mon Sep 17 00:00:00 2001 From: Victor Date: Fri, 8 Jun 2018 11:39:11 -0300 Subject: [PATCH 08/22] Accepts integer/float string with units and raises when unit is ambiguous. --- .../scalar/timedelta/test_construction.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index 79c742e97cc04..0283b70350bf6 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -85,6 +85,10 @@ def test_construction(): with pytest.raises(ValueError): Timedelta('10 days -1 h 1.5m 1s 3us') + # no units specified + with pytest.raises(ValueError): + Timedelta('3.1415') + # invalid construction tm.assert_raises_regex(ValueError, "cannot construct a Timedelta", lambda: Timedelta()) @@ -219,15 +223,17 @@ def __exit__(self, exc_type, exc_val, exc_tb): @pytest.mark.parametrize("redundant_unit, expectation", [ ("", not_raises()), ("d", pytest.raises(ValueError)), - ("us", pytest.raises(ValueError))]) + ("us", pytest.raises(ValueError)), +]) @pytest.mark.parametrize("unit", [ - "d", "m", "s", "us"]) + "d", "m", "s", "us" +]) @pytest.mark.parametrize("sign", [ - +1, -1]) + +1, -1 +]) @pytest.mark.parametrize("num", [ - 0.001, 1, 10]) + 0.001, 1, 10 +]) def test_string_with_unit(num, sign, unit, redundant_unit, expectation): with expectation: - val = sign * num - val_str = str(val) + redundant_unit - assert Timedelta(val_str, unit=unit) == Timedelta(val, unit=unit) + assert Timedelta(str(sign*num)+redundant_unit, unit=unit) == Timedelta(sign*num, unit=unit) From ce94192f4c063248456f6107e251377d3cca0821 Mon Sep 17 00:00:00 2001 From: Victor Date: Fri, 8 Jun 2018 12:03:44 -0300 Subject: [PATCH 09/22] Updated function signature in pxd file. --- pandas/_libs/tslibs/timedeltas.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index dbdead1dba551..3ab70f55881b4 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -3,7 +3,7 @@ from numpy cimport int64_t # Exposed for tslib, not intended for outside use. -cdef parse_timedelta_string(object ts, specified_unit=*) +cdef parse_timedelta_string(object ts, object specified_unit=None) cpdef int64_t cast_from_unit(object ts, object unit) except? -1 cpdef int64_t delta_to_nanoseconds(delta) except? -1 cpdef convert_to_timedelta64(object ts, object unit) From c5900924c4af327cec918ce6f4537c69929a8615 Mon Sep 17 00:00:00 2001 From: Victor Date: Fri, 8 Jun 2018 16:06:50 -0300 Subject: [PATCH 10/22] Passing tests. --- pandas/_libs/tslibs/timedeltas.pxd | 2 +- .../scalar/timedelta/test_construction.py | 19 ++++++------------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 3ab70f55881b4..dbdead1dba551 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -3,7 +3,7 @@ from numpy cimport int64_t # Exposed for tslib, not intended for outside use. -cdef parse_timedelta_string(object ts, object specified_unit=None) +cdef parse_timedelta_string(object ts, specified_unit=*) cpdef int64_t cast_from_unit(object ts, object unit) except? -1 cpdef int64_t delta_to_nanoseconds(delta) except? -1 cpdef convert_to_timedelta64(object ts, object unit) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index 0283b70350bf6..47f255b68fddb 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -85,10 +85,6 @@ def test_construction(): with pytest.raises(ValueError): Timedelta('10 days -1 h 1.5m 1s 3us') - # no units specified - with pytest.raises(ValueError): - Timedelta('3.1415') - # invalid construction tm.assert_raises_regex(ValueError, "cannot construct a Timedelta", lambda: Timedelta()) @@ -223,17 +219,14 @@ def __exit__(self, exc_type, exc_val, exc_tb): @pytest.mark.parametrize("redundant_unit, expectation", [ ("", not_raises()), ("d", pytest.raises(ValueError)), - ("us", pytest.raises(ValueError)), -]) + ("us", pytest.raises(ValueError))]) @pytest.mark.parametrize("unit", [ - "d", "m", "s", "us" -]) + "d", "m", "s", "us"]) @pytest.mark.parametrize("sign", [ - +1, -1 -]) + +1, -1]) @pytest.mark.parametrize("num", [ - 0.001, 1, 10 -]) + 0.001, 1, 10]) def test_string_with_unit(num, sign, unit, redundant_unit, expectation): with expectation: - assert Timedelta(str(sign*num)+redundant_unit, unit=unit) == Timedelta(sign*num, unit=unit) + assert Timedelta(str(sign * num) + redundant_unit, unit=unit) \ + == Timedelta(sign * num, unit=unit) From bb08fc403bdf29d7ce4006f0baf107aef75590c2 Mon Sep 17 00:00:00 2001 From: victor Date: Sun, 30 Sep 2018 20:45:02 +0200 Subject: [PATCH 11/22] Moved whatsnew to 0.24.0. --- doc/source/whatsnew/v0.24.0.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b71edcf1f6f51..5bbdddaf60fbe 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -665,8 +665,7 @@ Timedelta - Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`) - Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`) - Fixed bug where subtracting :class:`Timedelta` from an object-dtyped array would raise ``TypeError`` (:issue:`21980`) -- -- +- Bug in :class:`Timedelta`: where passing a string of a pure number would not take unit into account. Also raises for ambiguous/duplicate unit specification (:issue: `12136`) Timezones ^^^^^^^^^ From 2c46d7342ad8d00225a85148bdcfb8f32fd6f83c Mon Sep 17 00:00:00 2001 From: victor Date: Sun, 30 Sep 2018 20:45:53 +0200 Subject: [PATCH 12/22] Reverted parse_timedelta_string signature. --- pandas/_libs/tslibs/timedeltas.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index dbdead1dba551..eda4418902513 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -3,7 +3,7 @@ from numpy cimport int64_t # Exposed for tslib, not intended for outside use. -cdef parse_timedelta_string(object ts, specified_unit=*) +cdef parse_timedelta_string(object ts) cpdef int64_t cast_from_unit(object ts, object unit) except? -1 cpdef int64_t delta_to_nanoseconds(delta) except? -1 cpdef convert_to_timedelta64(object ts, object unit) From 298a33cc7b8e65e5c2685940b2f50df88ebc0718 Mon Sep 17 00:00:00 2001 From: victor Date: Sun, 30 Sep 2018 21:05:46 +0200 Subject: [PATCH 13/22] Checking float with units in Timedelta class. --- pandas/_libs/tslibs/timedeltas.pyx | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index cec07dbebdfa4..3d98701603047 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -969,7 +969,7 @@ class Timedelta(_Timedelta): ---------- value : Timedelta, timedelta, np.timedelta64, string, or integer unit : string, {'ns', 'us', 'ms', 's', 'm', 'h', 'D'}, optional - Denote the unit of the input, if input is an integer. Default 'ns'. + Denote the unit of the input, if input is an integer/float. Default 'ns'. days, seconds, microseconds, milliseconds, minutes, hours, weeks : numeric, optional Values for construction in compat with datetime.timedelta. @@ -1018,8 +1018,15 @@ class Timedelta(_Timedelta): elif len(value) > 0 and value[0] == 'P': value = parse_iso_format_string(value) else: - value = parse_timedelta_string(value, unit) - value = np.timedelta64(value) + try: + value = float(value) + except ValueError: + value = parse_timedelta_string(value) + value = np.timedelta64(value) + else: + if unit is None: + raise ValueError("Cannot convert float string without unit.") + value = convert_to_timedelta64(value, unit) elif PyDelta_Check(value): value = convert_to_timedelta64(value, 'ns') elif is_timedelta64_object(value): From 677a8f8c18eb7ef4d1f02668446ed4f7b2cb3429 Mon Sep 17 00:00:00 2001 From: victor Date: Sun, 30 Sep 2018 21:08:44 +0200 Subject: [PATCH 14/22] Reverted test with exceptions for strings without unit. --- pandas/tests/scalar/timedelta/test_construction.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index 47f255b68fddb..23dd6c3fbd697 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -85,6 +85,10 @@ def test_construction(): with pytest.raises(ValueError): Timedelta('10 days -1 h 1.5m 1s 3us') + # no units specified + with pytest.raises(ValueError): + Timedelta('3.1415') + # invalid construction tm.assert_raises_regex(ValueError, "cannot construct a Timedelta", lambda: Timedelta()) From a371472a9e67c898c7cf79d70f0b968482062996 Mon Sep 17 00:00:00 2001 From: victor Date: Sun, 30 Sep 2018 21:12:23 +0200 Subject: [PATCH 15/22] Useing null context managers from contextlib. --- .../tests/scalar/timedelta/test_construction.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index 23dd6c3fbd697..e7470bc2f0123 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -8,6 +8,12 @@ import pandas.util.testing as tm from pandas import Timedelta +import sys +if sys.version_info >= (3, 3): + from contextlib import ExitStack as do_not_raise +else: + from contextlib2 import ExitStack as do_not_raise + def test_construction(): expected = np.timedelta64(10, 'D').astype('m8[ns]').view('i8') @@ -212,16 +218,8 @@ def test_td_constructor_value_error(): Timedelta(nanoseconds='abc') -class not_raises(object): - def __enter__(self): - pass - - def __exit__(self, exc_type, exc_val, exc_tb): - pass - - @pytest.mark.parametrize("redundant_unit, expectation", [ - ("", not_raises()), + ("", do_not_raise()), ("d", pytest.raises(ValueError)), ("us", pytest.raises(ValueError))]) @pytest.mark.parametrize("unit", [ From 46693244c4f67781b2c470d1e744027367bbcbb5 Mon Sep 17 00:00:00 2001 From: victor Date: Sun, 30 Sep 2018 21:15:10 +0200 Subject: [PATCH 16/22] Added null context manager for parametrizing tests on exception raising. --- pandas/util/testing.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 3db251e89842d..79949ad5a34ea 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -48,6 +48,14 @@ from pandas._libs import testing as _testing from pandas.io.common import urlopen +if sys.version_info >= (3, 3): + from contextlib import ExitStack as nullcontext +else: + from contextlib2 import ExitStack as nullcontext + + +do_not_raise = nullcontext() + N = 30 K = 4 From 93602aca48cbb6485d662803e19760ada8617486 Mon Sep 17 00:00:00 2001 From: victor Date: Sun, 30 Sep 2018 21:15:32 +0200 Subject: [PATCH 17/22] Using null context manager from pandas.util.testing. --- pandas/tests/scalar/timedelta/test_construction.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index e7470bc2f0123..7ff869487003f 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -8,12 +8,6 @@ import pandas.util.testing as tm from pandas import Timedelta -import sys -if sys.version_info >= (3, 3): - from contextlib import ExitStack as do_not_raise -else: - from contextlib2 import ExitStack as do_not_raise - def test_construction(): expected = np.timedelta64(10, 'D').astype('m8[ns]').view('i8') @@ -219,7 +213,7 @@ def test_td_constructor_value_error(): @pytest.mark.parametrize("redundant_unit, expectation", [ - ("", do_not_raise()), + ("", tm.do_not_raise), ("d", pytest.raises(ValueError)), ("us", pytest.raises(ValueError))]) @pytest.mark.parametrize("unit", [ From 28ffc982ad0069c3ce08cc6fd9fe7f860f8c1b1a Mon Sep 17 00:00:00 2001 From: victor Date: Sun, 30 Sep 2018 21:35:35 +0200 Subject: [PATCH 18/22] Raise when defining units in string and constructor. --- pandas/_libs/tslibs/timedeltas.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 3d98701603047..84e15b645c90a 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1021,6 +1021,8 @@ class Timedelta(_Timedelta): try: value = float(value) except ValueError: + if unit is not None: + raise ValueError("Unit cannot be defined for strings other than pure integer/floats.") value = parse_timedelta_string(value) value = np.timedelta64(value) else: From 654a0ea5ea7ebdffd4ebc715361f2d082137e8da Mon Sep 17 00:00:00 2001 From: victor Date: Sun, 30 Sep 2018 21:36:07 +0200 Subject: [PATCH 19/22] Simplified and added to_timedelta to test. --- .../scalar/timedelta/test_construction.py | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index 7ff869487003f..d3800705e9895 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -212,17 +212,13 @@ def test_td_constructor_value_error(): Timedelta(nanoseconds='abc') -@pytest.mark.parametrize("redundant_unit, expectation", [ - ("", tm.do_not_raise), - ("d", pytest.raises(ValueError)), - ("us", pytest.raises(ValueError))]) -@pytest.mark.parametrize("unit", [ - "d", "m", "s", "us"]) -@pytest.mark.parametrize("sign", [ - +1, -1]) -@pytest.mark.parametrize("num", [ - 0.001, 1, 10]) -def test_string_with_unit(num, sign, unit, redundant_unit, expectation): +@pytest.mark.parametrize("str_unit, unit, expectation", [ + ("", "s", tm.do_not_raise), + ("s", "s", pytest.raises(ValueError)), + ("", None, pytest.raises(ValueError)), + ("s", "d", pytest.raises(ValueError)),]) +def test_string_with_unit(str_unit, unit, expectation): with expectation: - assert Timedelta(str(sign * num) + redundant_unit, unit=unit) \ - == Timedelta(sign * num, unit=unit) + val_str = "10{}".format(str_unit) + assert Timedelta(val_str, unit=unit) == Timedelta(10, unit=unit) + assert pd.to_timedelta(val_str, unit=unit) == Timedelta(10, unit=unit) From f38b4cbfea75b385b2301b33b14f8d095a692a73 Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 1 Oct 2018 01:11:46 +0200 Subject: [PATCH 20/22] Added debug info to exceptions. --- pandas/_libs/tslibs/timedeltas.pyx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 84e15b645c90a..6727a07b6e14c 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1019,15 +1019,18 @@ class Timedelta(_Timedelta): value = parse_iso_format_string(value) else: try: + orig_value = value value = float(value) except ValueError: if unit is not None: - raise ValueError("Unit cannot be defined for strings other than pure integer/floats.") + raise ValueError("Unit cannot be defined for strings other than pure integer/floats." + " Value: {} Unit: {}".format(value, unit)) value = parse_timedelta_string(value) value = np.timedelta64(value) else: if unit is None: - raise ValueError("Cannot convert float string without unit.") + raise ValueError("Cannot convert float string without unit." + " Value: {} Type: {}".format(orig_value, type(orig_value))) value = convert_to_timedelta64(value, unit) elif PyDelta_Check(value): value = convert_to_timedelta64(value, 'ns') From e3226305ece191c506c7a1afc97b50609b3d7b62 Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 1 Oct 2018 01:12:14 +0200 Subject: [PATCH 21/22] Null context manager implementation. --- pandas/util/testing.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 79949ad5a34ea..a03827a361a8e 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -48,13 +48,17 @@ from pandas._libs import testing as _testing from pandas.io.common import urlopen -if sys.version_info >= (3, 3): - from contextlib import ExitStack as nullcontext -else: - from contextlib2 import ExitStack as nullcontext + +class NullContextManager(object): + def __init__(self, dummy_resource=None): + self.dummy_resource = dummy_resource + def __enter__(self): + return self.dummy_resource + def __exit__(self, *args): + pass -do_not_raise = nullcontext() +do_not_raise = NullContextManager() N = 30 From 85f04c9c44cc20b5dace8fd0301858fcb9bc57ce Mon Sep 17 00:00:00 2001 From: victor Date: Mon, 1 Oct 2018 01:48:22 +0200 Subject: [PATCH 22/22] Updated test. --- pandas/tests/scalar/timedelta/test_construction.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index d3800705e9895..a167ceed7059a 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -89,6 +89,9 @@ def test_construction(): with pytest.raises(ValueError): Timedelta('3.1415') + with pytest.raises(ValueError): + Timedelta('2000') + # invalid construction tm.assert_raises_regex(ValueError, "cannot construct a Timedelta", lambda: Timedelta()) @@ -213,10 +216,11 @@ def test_td_constructor_value_error(): @pytest.mark.parametrize("str_unit, unit, expectation", [ - ("", "s", tm.do_not_raise), - ("s", "s", pytest.raises(ValueError)), - ("", None, pytest.raises(ValueError)), - ("s", "d", pytest.raises(ValueError)),]) + ("", "s", tm.do_not_raise), # Expected case + ("s", "s", pytest.raises(ValueError)), # Units doubly defined + ("s", "d", pytest.raises(ValueError)), + ("", None, pytest.raises(ValueError)), # No units +]) def test_string_with_unit(str_unit, unit, expectation): with expectation: val_str = "10{}".format(str_unit)