diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b71edcf1f6f51..5bbdddaf60fbe 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -665,8 +665,7 @@ Timedelta - Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`) - Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`) - Fixed bug where subtracting :class:`Timedelta` from an object-dtyped array would raise ``TypeError`` (:issue:`21980`) -- -- +- Bug in :class:`Timedelta`: where passing a string of a pure number would not take unit into account. Also raises for ambiguous/duplicate unit specification (:issue: `12136`) Timezones ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 9c8be1901d1dc..6727a07b6e14c 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +# cython: profile=False import collections import textwrap import warnings @@ -6,13 +7,13 @@ import warnings import sys cdef bint PY3 = (sys.version_info[0] >= 3) -from cython import Py_ssize_t +from cython cimport Py_ssize_t -from cpython cimport Py_NE, Py_EQ, PyObject_RichCompare +from cpython cimport PyUnicode_Check, Py_NE, Py_EQ, PyObject_RichCompare import numpy as np cimport numpy as cnp -from numpy cimport int64_t +from numpy cimport int64_t, ndarray cnp.import_array() from cpython.datetime cimport (datetime, timedelta, @@ -32,7 +33,6 @@ from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct, from nattype import nat_strings, NaT from nattype cimport checknull_with_nat, NPY_NAT -from offsets cimport to_offset # ---------------------------------------------------------------------- # Constants @@ -78,44 +78,6 @@ cdef dict timedelta_abbrevs = { 'D': 'd', _no_input = object() - -# ---------------------------------------------------------------------- -# API - -def ints_to_pytimedelta(int64_t[:] arr, box=False): - """ - convert an i8 repr to an ndarray of timedelta or Timedelta (if box == - True) - - Parameters - ---------- - arr : ndarray[int64_t] - box : bool, default False - - Returns - ------- - result : ndarray[object] - array of Timedelta or timedeltas objects - """ - cdef: - Py_ssize_t i, n = len(arr) - int64_t value - object[:] result = np.empty(n, dtype=object) - - for i in range(n): - - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - if box: - result[i] = Timedelta(value) - else: - result[i] = timedelta(microseconds=int(value) / 1000) - - return result.base # .base to access underlying np.ndarray - - # ---------------------------------------------------------------------- cpdef int64_t delta_to_nanoseconds(delta) except? -1: @@ -161,7 +123,7 @@ cpdef convert_to_timedelta64(object ts, object unit): if ts.astype('int64') == NPY_NAT: return np.timedelta64(NPY_NAT) elif is_timedelta64_object(ts): - ts = ts.astype("m8[{unit}]".format(unit=unit.lower())) + ts = ts.astype("m8[{0}]".format(unit.lower())) elif is_integer_object(ts): if ts == NPY_NAT: return np.timedelta64(NPY_NAT) @@ -182,11 +144,7 @@ cpdef convert_to_timedelta64(object ts, object unit): ts = cast_from_unit(ts, unit) ts = np.timedelta64(ts) elif is_string_object(ts): - if len(ts) > 0 and ts[0] == 'P': - ts = parse_iso_format_string(ts) - else: - ts = parse_timedelta_string(ts) - ts = np.timedelta64(ts) + ts = np.timedelta64(parse_timedelta_string(ts)) elif hasattr(ts, 'delta'): ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns') @@ -198,7 +156,7 @@ cpdef convert_to_timedelta64(object ts, object unit): return ts.astype('timedelta64[ns]') -cpdef array_to_timedelta64(object[:] values, unit='ns', errors='raise'): +cpdef array_to_timedelta64(ndarray[object] values, unit='ns', errors='raise'): """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. @@ -206,7 +164,7 @@ cpdef array_to_timedelta64(object[:] values, unit='ns', errors='raise'): cdef: Py_ssize_t i, n - int64_t[:] iresult + ndarray[int64_t] iresult if errors not in ('ignore', 'raise', 'coerce'): raise ValueError("errors must be one of 'ignore', " @@ -232,7 +190,7 @@ cpdef array_to_timedelta64(object[:] values, unit='ns', errors='raise'): else: raise - return iresult.base # .base to access underlying np.ndarray + return iresult cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: @@ -264,7 +222,7 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: m = 1L p = 0 else: - raise ValueError("cannot cast unit {unit}".format(unit=unit)) + raise ValueError("cannot cast unit {0}".format(unit)) # just give me the unit back if ts is None: @@ -272,22 +230,22 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: # cast the unit, multiply base/frace separately # to avoid precision issues from float -> int - base = ts + base = ts frac = ts - base if p: frac = round(frac, p) - return (base * m) + (frac * m) + return (base * m) + (frac * m) cdef inline _decode_if_necessary(object ts): # decode ts if necessary - if not isinstance(ts, unicode) and not PY3: + if not PyUnicode_Check(ts) and not PY3: ts = str(ts).decode('utf-8') return ts -cdef inline parse_timedelta_string(object ts): +cdef inline parse_timedelta_string(object ts, specified_unit=None): """ Parse a regular format timedelta string. Return an int64_t (in ns) or raise a ValueError on an invalid parse. @@ -295,10 +253,11 @@ cdef inline parse_timedelta_string(object ts): cdef: unicode c - bint neg = 0, have_dot = 0, have_value = 0, have_hhmmss = 0 - object current_unit = None - int64_t result = 0, m = 0, r - list number = [], frac = [], unit = [] + bint neg=0, have_dot=0, have_value=0, have_hhmmss=0 + object current_unit=None + object fallback_unit=None + int64_t result=0, m=0, r + list number=[], frac=[], unit=[] # neg : tracks if we have a leading negative for the value # have_dot : tracks if we are processing a dot (either post hhmmss or @@ -373,7 +332,7 @@ cdef inline parse_timedelta_string(object ts): have_hhmmss = 1 else: raise ValueError("expecting hh:mm:ss format, " - "received: {ts}".format(ts=ts)) + "received: {0}".format(ts)) unit, number = [], [] @@ -448,9 +407,13 @@ cdef inline parse_timedelta_string(object ts): if have_value: raise ValueError("have leftover units") if len(number): - r = timedelta_from_spec(number, frac, 'ns') + fallback_unit = 'ns' if specified_unit is None else specified_unit + r = timedelta_from_spec(number, frac, fallback_unit) result += timedelta_as_neg(r, neg) + if (specified_unit is not None) and (fallback_unit is None): + raise ValueError("unit was specified but is redundant/ambiguous") + return result @@ -482,7 +445,7 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): unit = ''.join(unit) unit = timedelta_abbrevs[unit.lower()] except KeyError: - raise ValueError("invalid abbreviation: {unit}".format(unit=unit)) + raise ValueError("invalid abbreviation: {0}".format(unit)) n = ''.join(number) + '.' + ''.join(frac) return cast_from_unit(float(n), unit) @@ -541,12 +504,10 @@ def _binary_op_method_timedeltalike(op, name): elif hasattr(other, 'dtype'): # nd-array like - if other.dtype.kind in ['m', 'M']: - return op(self.to_timedelta64(), other) - elif other.dtype.kind == 'O': - return np.array([op(self, x) for x in other]) - else: + if other.dtype.kind not in ['m', 'M']: + # raise rathering than letting numpy return wrong answer return NotImplemented + return op(self.to_timedelta64(), other) elif not _validate_ops_compat(other): return NotImplemented @@ -593,10 +554,10 @@ cdef inline int64_t parse_iso_format_string(object ts) except? -1: cdef: unicode c int64_t result = 0, r - int p = 0 + int p=0 object dec_unit = 'ms', err_msg - bint have_dot = 0, have_value = 0, neg = 0 - list number = [], unit = [] + bint have_dot=0, have_value=0, neg=0 + list number=[], unit=[] ts = _decode_if_necessary(ts) @@ -683,8 +644,8 @@ cdef _to_py_int_float(v): return int(v) elif is_float_object(v): return float(v) - raise TypeError("Invalid type {typ}. Must be int or " - "float.".format(typ=type(v))) + raise TypeError("Invalid type {0}. Must be int or " + "float.".format(type(v))) # Similar to Timestamp/datetime, this is a construction requirement for @@ -730,10 +691,9 @@ cdef class _Timedelta(timedelta): return True # only allow ==, != ops - raise TypeError('Cannot compare type {cls} with ' - 'type {other}' - .format(cls=type(self).__name__, - other=type(other).__name__)) + raise TypeError('Cannot compare type {!r} with type ' \ + '{!r}'.format(type(self).__name__, + type(other).__name__)) if util.is_array(other): return PyObject_RichCompare(np.array([self]), other, op) return PyObject_RichCompare(other, self, reverse_ops[op]) @@ -742,9 +702,9 @@ cdef class _Timedelta(timedelta): return False elif op == Py_NE: return True - raise TypeError('Cannot compare type {cls} with type {other}' - .format(cls=type(self).__name__, - other=type(other).__name__)) + raise TypeError('Cannot compare type {!r} with type ' \ + '{!r}'.format(type(self).__name__, + type(other).__name__)) return cmp_scalar(self.value, ots.value, op) @@ -835,81 +795,12 @@ cdef class _Timedelta(timedelta): @property def asm8(self): - """ - Return a numpy timedelta64 array scalar view. - - Provides access to the array scalar view (i.e. a combination of the - value and the units) associated with the numpy.timedelta64().view(), - including a 64-bit integer representation of the timedelta in - nanoseconds (Python int compatible). - - Returns - ------- - numpy timedelta64 array scalar view - Array scalar view of the timedelta in nanoseconds. - - Examples - -------- - >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') - >>> td.asm8 - numpy.timedelta64(86520000003042,'ns') - - >>> td = pd.Timedelta('2 min 3 s') - >>> td.asm8 - numpy.timedelta64(123000000000,'ns') - - >>> td = pd.Timedelta('3 ms 5 us') - >>> td.asm8 - numpy.timedelta64(3005000,'ns') - - >>> td = pd.Timedelta(42, unit='ns') - >>> td.asm8 - numpy.timedelta64(42,'ns') - """ + """ return a numpy timedelta64 array view of myself """ return np.int64(self.value).view('m8[ns]') @property def resolution(self): - """ - Return a string representing the lowest timedelta resolution. - - Each timedelta has a defined resolution that represents the lowest OR - most granular level of precision. Each level of resolution is - represented by a short string as defined below: - - Resolution: Return value - - * Days: 'D' - * Hours: 'H' - * Minutes: 'T' - * Seconds: 'S' - * Milliseconds: 'L' - * Microseconds: 'U' - * Nanoseconds: 'N' - - Returns - ------- - str - Timedelta resolution. - - Examples - -------- - >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') - >>> td.resolution - 'N' - - >>> td = pd.Timedelta('1 days 2 min 3 us') - >>> td.resolution - 'U' - - >>> td = pd.Timedelta('2 min 3 s') - >>> td.resolution - 'S' - - >>> td = pd.Timedelta(36, unit='us') - >>> td.resolution - 'U' - """ + """ return a string representing the lowest resolution that we have """ self._ensure_components() if self._ns: @@ -931,7 +822,7 @@ cdef class _Timedelta(timedelta): def nanoseconds(self): """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. - + Returns ------- int @@ -982,8 +873,8 @@ cdef class _Timedelta(timedelta): sign = " " if format == 'all': - fmt = ("{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}." - "{milliseconds:03}{microseconds:03}{nanoseconds:03}") + fmt = "{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}." \ + "{milliseconds:03}{microseconds:03}{nanoseconds:03}" else: # if we have a partial day subs = (self._h or self._m or self._s or @@ -1008,14 +899,11 @@ cdef class _Timedelta(timedelta): return fmt.format(**comp_dict) def __repr__(self): - return "Timedelta('{val}')".format(val=self._repr_base(format='long')) + return "Timedelta('{0}')".format(self._repr_base(format='long')) def __str__(self): return self._repr_base(format='long') - def __bool__(self): - return self.value != 0 - def isoformat(self): """ Format Timedelta as ISO 8601 Duration like @@ -1062,8 +950,8 @@ cdef class _Timedelta(timedelta): components.nanoseconds) # Trim unnecessary 0s, 1.000000000 -> 1 seconds = seconds.rstrip('0').rstrip('.') - tpl = ('P{td.days}DT{td.hours}H{td.minutes}M{seconds}S' - .format(td=components, seconds=seconds)) + tpl = 'P{td.days}DT{td.hours}H{td.minutes}M{seconds}S'.format( + td=components, seconds=seconds) return tpl @@ -1081,7 +969,7 @@ class Timedelta(_Timedelta): ---------- value : Timedelta, timedelta, np.timedelta64, string, or integer unit : string, {'ns', 'us', 'ms', 's', 'm', 'h', 'D'}, optional - Denote the unit of the input, if input is an integer. Default 'ns'. + Denote the unit of the input, if input is an integer/float. Default 'ns'. days, seconds, microseconds, milliseconds, minutes, hours, weeks : numeric, optional Values for construction in compat with datetime.timedelta. @@ -1116,11 +1004,34 @@ class Timedelta(_Timedelta): if isinstance(value, Timedelta): value = value.value elif is_string_object(value): - if len(value) > 0 and value[0] == 'P': + # Check if it is just a number in a string + try: + value = int(value) + except (ValueError, TypeError): + try: + value = float(value) + except (ValueError, TypeError): + pass + + if is_integer_object(value) or is_float_object(value): + value = convert_to_timedelta64(value, unit) + elif len(value) > 0 and value[0] == 'P': value = parse_iso_format_string(value) else: - value = parse_timedelta_string(value) - value = np.timedelta64(value) + try: + orig_value = value + value = float(value) + except ValueError: + if unit is not None: + raise ValueError("Unit cannot be defined for strings other than pure integer/floats." + " Value: {} Unit: {}".format(value, unit)) + value = parse_timedelta_string(value) + value = np.timedelta64(value) + else: + if unit is None: + raise ValueError("Cannot convert float string without unit." + " Value: {} Type: {}".format(orig_value, type(orig_value))) + value = convert_to_timedelta64(value, unit) elif PyDelta_Check(value): value = convert_to_timedelta64(value, 'ns') elif is_timedelta64_object(value): @@ -1164,6 +1075,7 @@ class Timedelta(_Timedelta): cdef: int64_t result, unit + from pandas.tseries.frequencies import to_offset unit = to_offset(freq).nanos result = unit * rounder(self.value / float(unit)) return Timedelta(result, unit='ns') @@ -1370,7 +1282,7 @@ class Timedelta(_Timedelta): '{op}'.format(dtype=other.dtype, op='__floordiv__')) - elif is_float_object(other) and util.is_nan(other): + elif is_float_object(other) and util._checknull(other): # i.e. np.nan return NotImplemented diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index d648140aa7347..a167ceed7059a 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -89,6 +89,9 @@ def test_construction(): with pytest.raises(ValueError): Timedelta('3.1415') + with pytest.raises(ValueError): + Timedelta('2000') + # invalid construction tm.assert_raises_regex(ValueError, "cannot construct a Timedelta", lambda: Timedelta()) @@ -210,3 +213,16 @@ def test_td_constructor_on_nanoseconds(constructed_td, conversion): def test_td_constructor_value_error(): with pytest.raises(TypeError): Timedelta(nanoseconds='abc') + + +@pytest.mark.parametrize("str_unit, unit, expectation", [ + ("", "s", tm.do_not_raise), # Expected case + ("s", "s", pytest.raises(ValueError)), # Units doubly defined + ("s", "d", pytest.raises(ValueError)), + ("", None, pytest.raises(ValueError)), # No units +]) +def test_string_with_unit(str_unit, unit, expectation): + with expectation: + val_str = "10{}".format(str_unit) + assert Timedelta(val_str, unit=unit) == Timedelta(10, unit=unit) + assert pd.to_timedelta(val_str, unit=unit) == Timedelta(10, unit=unit) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 3db251e89842d..a03827a361a8e 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -49,6 +49,18 @@ from pandas.io.common import urlopen +class NullContextManager(object): + def __init__(self, dummy_resource=None): + self.dummy_resource = dummy_resource + def __enter__(self): + return self.dummy_resource + def __exit__(self, *args): + pass + + +do_not_raise = NullContextManager() + + N = 30 K = 4 _RAISE_NETWORK_ERROR_DEFAULT = False