diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 4bc50695e1ecd..0c74551e9d2c1 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1303,6 +1303,7 @@ Deprecations - :func:`pandas.api.types.is_period` is deprecated in favor of `pandas.api.types.is_period_dtype` (:issue:`23917`) - :func:`pandas.api.types.is_datetimetz` is deprecated in favor of `pandas.api.types.is_datetime64tz` (:issue:`23917`) - Creating a :class:`TimedeltaIndex`, :class:`DatetimeIndex`, or :class:`PeriodIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range`, :func:`date_range`, or :func:`period_range` (:issue:`23919`) +- A timedelta passed a number string without a defined unit is deprecated (:issue:`12136`) - Passing a string alias like ``'datetime64[ns, UTC]'`` as the ``unit`` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`). - The ``skipna`` parameter of :meth:`~pandas.api.types.infer_dtype` will switch to ``True`` by default in a future version of pandas (:issue:`17066`, :issue:`24050`) - In :meth:`Series.where` with Categorical data, providing an ``other`` that is not present in the categories is deprecated. Convert the categorical to a different dtype or add the ``other`` to the categories first (:issue:`24077`). @@ -1562,6 +1563,7 @@ Timedelta - Fixed bug in adding a :class:`DataFrame` with all-`timedelta64[ns]` dtypes to a :class:`DataFrame` with all-integer dtypes returning incorrect results instead of raising ``TypeError`` (:issue:`22696`) - Bug in :class:`TimedeltaIndex` where adding a timezone-aware datetime scalar incorrectly returned a timezone-naive :class:`DatetimeIndex` (:issue:`23215`) - Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-``NaT`` :class:`DatetimeIndex` instead of an all-``NaT`` :class:`TimedeltaIndex` (:issue:`23215`) +- Bug in :class:`Timedelta` (and :func: `to_timedelta`) where passing a string of a pure number would not take the unit into account. Now raises for an ambiguous or duplicate unit specification.(:issue:`12136`) - Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`) - Bug in :class:`TimedeltaIndex` division where dividing by another :class:`TimedeltaIndex` raised ``TypeError`` instead of returning a :class:`Float64Index` (:issue:`23829`, :issue:`22631`) - Bug in :class:`TimedeltaIndex` comparison operations where comparing against non-``Timedelta``-like objects would raise ``TypeError`` instead of returning all-``False`` for ``__eq__`` and all-``True`` for ``__ne__`` (:issue:`24056`) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index c02a840281266..b5732ee670ee8 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -3,6 +3,7 @@ from numpy cimport int64_t # Exposed for tslib, not intended for outside use. +cpdef parse_timedelta_string(object ts, object specified_unit=*) cdef int64_t cast_from_unit(object ts, object unit) except? -1 cpdef int64_t delta_to_nanoseconds(delta) except? -1 -cpdef convert_to_timedelta64(object ts, object unit) +cpdef convert_to_timedelta64(object ts, object unit=*) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 037e7de27adc3..828ea22d7bbf1 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -148,7 +148,7 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: raise TypeError(type(delta)) -cpdef convert_to_timedelta64(object ts, object unit): +cpdef convert_to_timedelta64(object ts, object unit=None): """ Convert an incoming object to a timedelta64 if possible. Before calling, unit must be standardized to avoid repeated unit conversion @@ -162,6 +162,8 @@ cpdef convert_to_timedelta64(object ts, object unit): Return an ns based int64 """ + if unit is None: + unit = 'ns' if checknull_with_nat(ts): return np.timedelta64(NPY_NAT) elif isinstance(ts, Timedelta): @@ -211,7 +213,7 @@ cpdef convert_to_timedelta64(object ts, object unit): @cython.boundscheck(False) @cython.wraparound(False) -def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): +def array_to_timedelta64(object[:] values, unit=None, errors='raise'): """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. @@ -234,7 +236,7 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): # this is where all of the error handling will take place. try: for i in range(n): - result[i] = parse_timedelta_string(values[i]) + result[i] = parse_timedelta_string(values[i], specified_unit=unit) except: unit = parse_timedelta_unit(unit) for i in range(n): @@ -310,7 +312,7 @@ cdef inline _decode_if_necessary(object ts): return ts -cdef inline parse_timedelta_string(object ts): +cpdef inline parse_timedelta_string(object ts, specified_unit=None): """ Parse a regular format timedelta string. Return an int64_t (in ns) or raise a ValueError on an invalid parse. @@ -424,6 +426,17 @@ cdef inline parse_timedelta_string(object ts): have_value = 1 have_dot = 0 + # Consider units from outside + if not unit: + if specified_unit: + unit = [specified_unit] + else: + if specified_unit: + raise ValueError( + "units were doubly specified, both as an argument ({})" + " and inside string ({})".format(specified_unit, unit) + ) + # we had a dot, but we have a fractional # value since we have an unit if have_dot and len(unit): @@ -465,14 +478,17 @@ cdef inline parse_timedelta_string(object ts): else: raise ValueError("unit abbreviation w/o a number") - # treat as nanoseconds - # but only if we don't have anything else + # raise if we just have a number without units else: if have_value: raise ValueError("have leftover units") if len(number): - r = timedelta_from_spec(number, frac, 'ns') - result += timedelta_as_neg(r, neg) + warnings.warn( + "number string without units is deprecated and" + " will raise an exception in future versions. Considering as nanoseconds.", + FutureWarning + ) + result = timedelta_from_spec(number, frac, 'ns') return result @@ -521,10 +537,12 @@ cpdef inline object parse_timedelta_unit(object unit): ---------- unit : an unit string """ - if unit is None: - return 'ns' - elif unit == 'M': + + # Preserve unit if None, will be cast to nanoseconds + # later on at the proper functions + if unit is None or unit == 'M': return unit + try: return timedelta_abbrevs[unit.lower()] except (KeyError, AttributeError): @@ -1167,7 +1185,7 @@ class Timedelta(_Timedelta): if len(value) > 0 and value[0] == 'P': value = parse_iso_format_string(value) else: - value = parse_timedelta_string(value) + value = parse_timedelta_string(value, specified_unit=unit) value = np.timedelta64(value) elif PyDelta_Check(value): value = convert_to_timedelta64(value, 'ns') diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 1ec37c9f228a6..c35b05ce9f9c2 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -874,6 +874,8 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): # treat as multiples of the given unit. If after converting to nanos, # there are fractional components left, these are truncated # (i.e. NOT rounded) + if unit is None: + unit = "ns" mask = np.isnan(data) coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns') data = (coeff * data).astype(np.int64).view('timedelta64[ns]') diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 6bcf56c306e6a..c21c5b6930911 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -6,7 +6,10 @@ from pandas._libs import tslibs from pandas._libs.tslibs.timedeltas import ( - convert_to_timedelta64, parse_timedelta_unit) + convert_to_timedelta64, + parse_timedelta_string, + parse_timedelta_unit, +) from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries @@ -15,7 +18,7 @@ from pandas.core.arrays.timedeltas import sequence_to_td64ns -def to_timedelta(arg, unit='ns', box=True, errors='raise'): +def to_timedelta(arg, unit=None, box=True, errors='raise'): """ Convert argument to timedelta. @@ -116,26 +119,30 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise'): box=box, errors=errors) -def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'): +def _coerce_scalar_to_timedelta_type(r, unit=None, box=True, errors='raise'): """Convert string 'r' to a timedelta object.""" try: - result = convert_to_timedelta64(r, unit) - except ValueError: - if errors == 'raise': - raise - elif errors == 'ignore': - return r - - # coerce - result = pd.NaT + result = parse_timedelta_string(r, unit) + result = np.timedelta64(result) + except (ValueError, TypeError): + try: + result = convert_to_timedelta64(r, unit) + except ValueError: + if errors == 'raise': + raise + elif errors == 'ignore': + return r + + # coerce + result = pd.NaT if box: result = tslibs.Timedelta(result) return result -def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None): +def _convert_listlike(arg, unit=None, box=True, errors='raise', name=None): """Convert a list of objects to a timedelta index object.""" if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index f14ecae448723..8ad3362ab1b08 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -298,7 +298,7 @@ class TestFrameFlexArithmetic(object): def test_df_add_td64_columnwise(self): # GH 22534 Check that column-wise addition broadcasts correctly dti = pd.date_range('2016-01-01', periods=10) - tdi = pd.timedelta_range('1', periods=10) + tdi = pd.timedelta_range('1ns', periods=10) tser = pd.Series(tdi) df = pd.DataFrame({0: dti, 1: tdi}) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 97898dd8942f8..5bac697d5767f 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -234,7 +234,7 @@ def test_drop_duplicates(self): 'T', '2T', 'S', '-3S']) def test_infer_freq(self, freq): # GH#11018 - idx = pd.timedelta_range('1', freq=freq, periods=10) + idx = pd.timedelta_range('1ns', freq=freq, periods=10) result = pd.TimedeltaIndex(idx.asi8, freq='infer') tm.assert_index_equal(idx, result) assert result.freq == freq diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py index 62bf2a0b4a1cf..2134a1c9034a5 100644 --- a/pandas/tests/indexes/timedeltas/test_partial_slicing.py +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -51,7 +51,8 @@ def test_partial_slice_high_reso(self): assert result == s.iloc[1001] def test_slice_with_negative_step(self): - ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) + ts = Series(np.arange(20), + timedelta_range('0ns', periods=20, freq='H')) SLC = pd.IndexSlice def assert_slices_equivalent(l_slc, i_slc): @@ -76,7 +77,8 @@ def assert_slices_equivalent(l_slc, i_slc): assert_slices_equivalent(SLC['7 hours':'15 hours':-1], SLC[:0]) def test_slice_with_zero_step_raises(self): - ts = Series(np.arange(20), timedelta_range('0', periods=20, freq='H')) + ts = Series(np.arange(20), + timedelta_range('0ns', periods=20, freq='H')) with pytest.raises(ValueError, match='slice step cannot be zero'): ts[::0] with pytest.raises(ValueError, match='slice step cannot be zero'): diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index c78ab41d2fae4..430eca276b0ad 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1400,7 +1400,7 @@ def test_format_timedelta_ticks_narrow(self): '00:00:00.00000000{:d}'.format(2 * i) for i in range(5)] + [''] - rng = timedelta_range('0', periods=10, freq='ns') + rng = timedelta_range('0ns', periods=10, freq='ns') df = DataFrame(np.random.randn(len(rng), 3), rng) fig, ax = self.plt.subplots() df.plot(fontsize=2, ax=ax) @@ -1431,7 +1431,7 @@ def test_format_timedelta_ticks_wide(self): expected_labels = expected_labels[1:-1] expected_labels[-1] = '' - rng = timedelta_range('0', periods=10, freq='1 d') + rng = timedelta_range('0ns', periods=10, freq='1 d') df = DataFrame(np.random.randn(len(rng), 3), rng) fig, ax = self.plt.subplots() ax = df.plot(fontsize=2, ax=ax) diff --git a/pandas/tests/scalar/timedelta/test_construction.py b/pandas/tests/scalar/timedelta/test_construction.py index 880eca914749b..71e17344f02de 100644 --- a/pandas/tests/scalar/timedelta/test_construction.py +++ b/pandas/tests/scalar/timedelta/test_construction.py @@ -5,6 +5,7 @@ import pytest from pandas import Timedelta, offsets, to_timedelta +from pandas.util.testing import do_not_raise def test_construction(): @@ -83,10 +84,6 @@ def test_construction(): with pytest.raises(ValueError): Timedelta('10 days -1 h 1.5m 1s 3us') - # no units specified - with pytest.raises(ValueError): - Timedelta('3.1415') - # invalid construction with pytest.raises(ValueError, match="cannot construct a Timedelta"): Timedelta() @@ -208,3 +205,47 @@ def test_td_constructor_on_nanoseconds(constructed_td, conversion): def test_td_constructor_value_error(): with pytest.raises(TypeError): Timedelta(nanoseconds='abc') + + +@pytest.mark.parametrize("value", [ + 3.1415, # Number with decimals (original test) + 10, # Integer number, did not raise before +]) +@pytest.mark.parametrize("str_unit, unit, expectation", [ + # Expected case + ("", + "s", + do_not_raise), + + # Units doubly defined + ("s", + "d", + pytest.raises(ValueError, + message="units were doubly specified, " + "both as an argument (d) and inside string (s)") + ), + + # Units doubly defined (same) + ("s", + "s", + pytest.raises(ValueError, + message="units were doubly specified, " + "both as an argument (s) and inside string (s)")), + + # No units + ("", + None, + pytest.warns(DeprecationWarning, + message="number string without units is deprecated and " + " will raise an exception in future versions. " + "Considering as nanoseconds.")), +]) +def test_string_with_unit(value, str_unit, unit, expectation): + with expectation: + val_str = "{}{}".format(value, str_unit) + expected_td = Timedelta(value, unit=unit) + + assert Timedelta(val_str, unit=unit) == expected_td + assert to_timedelta(val_str, unit=unit) == expected_td + assert all(to_timedelta([val_str, val_str], unit=unit) == + to_timedelta([expected_td, expected_td])) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index db0c848eaeb4b..9c49668363c0e 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -472,12 +472,9 @@ def test_short_format_converters(self): def conv(v): return v.astype('m8[ns]') - assert ct('10') == np.timedelta64(10, 'ns') assert ct('10ns') == np.timedelta64(10, 'ns') - assert ct('100') == np.timedelta64(100, 'ns') assert ct('100ns') == np.timedelta64(100, 'ns') - assert ct('1000') == np.timedelta64(1000, 'ns') assert ct('1000ns') == np.timedelta64(1000, 'ns') assert ct('1000NS') == np.timedelta64(1000, 'ns') @@ -614,7 +611,7 @@ def test_implementation_limits(self): def test_total_seconds_precision(self): # GH 19458 assert Timedelta('30S').total_seconds() == 30.0 - assert Timedelta('0').total_seconds() == 0.0 + assert Timedelta('0ns').total_seconds() == 0.0 assert Timedelta('-2S').total_seconds() == -2.0 assert Timedelta('5.324S').total_seconds() == 5.324 assert (Timedelta('30S').total_seconds() - 30.0) < 1e-20 diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index d36de931e2610..a9df403b0176d 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -18,7 +18,7 @@ Series([True, False, True] * 3), Series(pd.date_range("20130101", periods=9)), Series(pd.date_range("20130101", periods=9, tz="US/Eastern")), - Series(pd.timedelta_range("2000", periods=9))]) + Series(pd.timedelta_range("2000ns", periods=9))]) def series(request): return request.param diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 2df43cd678764..ab4543a718590 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -43,6 +43,42 @@ from pandas.io.common import urlopen from pandas.io.formats.printing import pprint_thing + +class NullContextManager(object): + """No-op context manager (does nothing). + + Mainly used for defining ``do_not_raise`` context manager, + for pytest tests where we are required to parametrize on + whether we should raise something or not: + + Example + ------- + + >>> import pytest + >>> from pytest import raises + >>> from pandas.util.testing import do_not_raise + >>> @pytest.mark.parametrize("input, expectation", [ + ... (1, do_not_raise), + ... ("a", raises(ValueError)) + ... ]) + >>> def test_convert_number(input, expectation): + ... with expectation: + ... float(input) + + """ + def __init__(self, dummy_resource=None): + self.dummy_resource = dummy_resource + + def __enter__(self): + return self.dummy_resource + + def __exit__(self, *args): + pass + + +do_not_raise = NullContextManager() + + N = 30 K = 4 _RAISE_NETWORK_ERROR_DEFAULT = False