diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 88bf0e005a221..81795c5d531fe 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -701,6 +701,8 @@ Deprecations instead (:issue:`34191`). - The ``squeeze`` keyword in the ``groupby`` function is deprecated and will be removed in a future version (:issue:`32380`) +- A timedelta passed a number string without a defined unit is deprecated (:issue:`12136`) + .. --------------------------------------------------------------------------- @@ -777,6 +779,7 @@ Timedelta - Bug in :func:`timedelta_range` that produced an extra point on a edge case (:issue:`30353`, :issue:`33498`) - Bug in :meth:`DataFrame.resample` that produced an extra point on a edge case (:issue:`30353`, :issue:`13022`, :issue:`33498`) - Bug in :meth:`DataFrame.resample` that ignored the ``loffset`` argument when dealing with timedelta (:issue:`7687`, :issue:`33498`) +- Bug in :class:`Timedelta` (and :func: `to_timedelta`) where passing a string of a pure number would not take the unit into account. Now raises for an ambiguous or duplicate unit specification.(:issue:`12136`) Timezones ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index 95ddf8840e65d..c09a9af4bb858 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -1,6 +1,7 @@ from numpy cimport int64_t # Exposed for tslib, not intended for outside use. +cpdef parse_timedelta_string(object ts, object specified_unit=*) cpdef int64_t delta_to_nanoseconds(delta) except? -1 -cdef convert_to_timedelta64(object ts, object unit) +cpdef convert_to_timedelta64(object ts, object unit=*) cdef bint is_any_td_scalar(object obj) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f7f8b86359732..06f8bd7ea15b8 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1,4 +1,5 @@ import collections +import warnings import cython @@ -160,7 +161,7 @@ cpdef int64_t delta_to_nanoseconds(delta) except? -1: raise TypeError(type(delta)) -cdef convert_to_timedelta64(object ts, object unit): +cpdef convert_to_timedelta64(object ts, object unit=None): """ Convert an incoming object to a timedelta64 if possible. Before calling, unit must be standardized to avoid repeated unit conversion @@ -174,6 +175,8 @@ cdef convert_to_timedelta64(object ts, object unit): Return an ns based int64 """ + if unit is None: + unit = 'ns' if checknull_with_nat(ts): return np.timedelta64(NPY_NAT) elif isinstance(ts, _Timedelta): @@ -218,7 +221,7 @@ cdef convert_to_timedelta64(object ts, object unit): @cython.boundscheck(False) @cython.wraparound(False) -def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): +def array_to_timedelta64(object[:] values, unit=None, errors='raise'): """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. @@ -240,13 +243,8 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): # this is where all of the error handling will take place. try: for i in range(n): - if values[i] is NaT: - # we allow this check in the fast-path because NaT is a C-object - # so this is an inexpensive check - iresult[i] = NPY_NAT - else: - result[i] = parse_timedelta_string(values[i]) - except (TypeError, ValueError): + result[i] = parse_timedelta_string(values[i], specified_unit=unit) + except: unit = parse_timedelta_unit(unit) for i in range(n): try: @@ -260,7 +258,7 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): return iresult.base # .base to access underlying np.ndarray -cdef inline int64_t parse_timedelta_string(str ts) except? -1: +cpdef inline parse_timedelta_string(object ts, specified_unit=None): """ Parse a regular format timedelta string. Return an int64_t (in ns) or raise a ValueError on an invalid parse. @@ -371,6 +369,17 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: have_value = 1 have_dot = 0 + # Consider units from outside + if not unit: + if specified_unit: + unit = [specified_unit] + else: + if specified_unit: + raise ValueError( + "units were doubly specified, both as an argument ({}) " + "and inside string ({})".format(specified_unit, unit) + ) + # we had a dot, but we have a fractional # value since we have an unit if have_dot and len(unit): @@ -412,14 +421,17 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: else: raise ValueError("unit abbreviation w/o a number") - # treat as nanoseconds - # but only if we don't have anything else + # raise if we just have a number without units else: if have_value: raise ValueError("have leftover units") if len(number): - r = timedelta_from_spec(number, frac, 'ns') - result += timedelta_as_neg(r, neg) + warnings.warn( + "number string without units is deprecated and " + "will raise an exception in future versions. Considering as nanoseconds.", + FutureWarning + ) + result = timedelta_from_spec(number, frac, 'ns') return result @@ -478,10 +490,12 @@ cpdef inline str parse_timedelta_unit(object unit): ------ ValueError : on non-parseable input """ - if unit is None: - return "ns" - elif unit == "M": + + # Preserve unit if None, will be cast to nanoseconds + # later on at the proper functions + if unit is None or unit == 'M': return unit + try: return timedelta_abbrevs[unit.lower()] except (KeyError, AttributeError): @@ -1158,7 +1172,7 @@ class Timedelta(_Timedelta): if len(value) > 0 and value[0] == 'P': value = parse_iso_format_string(value) else: - value = parse_timedelta_string(value) + value = parse_timedelta_string(value, specified_unit=unit) value = np.timedelta64(value) elif PyDelta_Check(value): value = convert_to_timedelta64(value, 'ns') diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 15d9987310f18..b185ddf244191 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -200,7 +200,7 @@ def stringify(value): v = v.tz_convert("UTC") return TermValue(v, v.value, kind) elif kind == "timedelta64" or kind == "timedelta": - v = Timedelta(v, unit="s").value + v = Timedelta(v).value return TermValue(int(v), v, kind) elif meta == "category": metadata = extract_array(self.metadata, extract_numpy=True) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 51b404b46f321..77f0570f8ce5b 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -13,7 +13,7 @@ from pandas.core.arrays.timedeltas import sequence_to_td64ns -def to_timedelta(arg, unit="ns", errors="raise"): +def to_timedelta(arg, unit=None, box=True, errors="raise"): """ Convert argument to timedelta. @@ -108,7 +108,7 @@ def to_timedelta(arg, unit="ns", errors="raise"): return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors) -def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): +def _coerce_scalar_to_timedelta_type(r, unit=None, box=True, errors="raise"): """Convert string 'r' to a timedelta object.""" try: result = Timedelta(r, unit) @@ -124,7 +124,7 @@ def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): return result -def _convert_listlike(arg, unit="ns", errors="raise", name=None): +def _convert_listlike(arg, unit=None, box=True, errors="raise", name=None): """Convert a list of objects to a timedelta index object.""" if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"): # This is needed only to ensure that in the case where we end up diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 1a61b379de943..b5c64b5ae13da 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -946,7 +946,7 @@ def test_invalid_nat_setitem_array(array, non_casting_nats): "array", [ pd.date_range("2000", periods=4).array, - pd.timedelta_range("2000", periods=4).array, + pd.timedelta_range("2000ns", periods=4).array, ], ) def test_to_numpy_extra(array): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index e7b7f3e524d44..dbd4d19348a3d 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -396,7 +396,7 @@ def test_floordiv_axis0_numexpr_path(self, opname): def test_df_add_td64_columnwise(self): # GH 22534 Check that column-wise addition broadcasts correctly dti = pd.date_range("2016-01-01", periods=10) - tdi = pd.timedelta_range("1", periods=10) + tdi = pd.timedelta_range("1ns", periods=10) tser = pd.Series(tdi) df = pd.DataFrame({0: dti, 1: tdi}) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 3e452e7e2841d..260188be5d00f 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -180,7 +180,7 @@ def test_drop_duplicates(self, freq_sample, keep, expected, index): def test_infer_freq(self, freq_sample): # GH#11018 - idx = pd.timedelta_range("1", freq=freq_sample, periods=10) + idx = pd.timedelta_range("1ns", freq=freq_sample, periods=10) result = pd.TimedeltaIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq_sample diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 7dcb692e29337..7f843350147e6 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1360,7 +1360,7 @@ def test_format_timedelta_ticks_narrow(self): expected_labels = [f"00:00:00.0000000{i:0>2d}" for i in np.arange(10)] - rng = timedelta_range("0", periods=10, freq="ns") + rng = timedelta_range("0ns", periods=10, freq="ns") df = DataFrame(np.random.randn(len(rng), 3), rng) fig, ax = self.plt.subplots() df.plot(fontsize=2, ax=ax) @@ -1384,7 +1384,7 @@ def test_format_timedelta_ticks_wide(self): "9 days 06:13:20", ] - rng = timedelta_range("0", periods=10, freq="1 d") + rng = timedelta_range("0ns", periods=10, freq="1 d") df = DataFrame(np.random.randn(len(rng), 3), rng) fig, ax = self.plt.subplots() ax = df.plot(fontsize=2, ax=ax) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 0fbb60c176b30..22fb78c5513a8 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -73,7 +73,7 @@ def test_resample_single_period_timedelta(): def test_resample_timedelta_idempotency(): # GH 12072 - index = pd.timedelta_range("0", periods=9, freq="10L") + index = pd.timedelta_range("0ns", periods=9, freq="10L") series = Series(range(9), index=index) result = series.resample("10L").mean() expected = series diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index c58994d738562..80a0ad7d07375 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -289,3 +289,28 @@ def test_timedelta_constructor_identity(): expected = Timedelta(np.timedelta64(1, "s")) result = Timedelta(expected) assert result is expected + + +@pytest.mark.parametrize( + "value, str_unit, unit, expectation", + [ + # Units doubly defined + (10, "s", "d", (ValueError, "units were doubly specified")), + # Units doubly defined (same) + (10, "s", "s", (ValueError, "units were doubly specified")), + # No units, decimal string + (3.1415, "", None, (ValueError, "no units specified")), + ], +) +def test_string_with_unit(value, str_unit, unit, expectation): + exp, match = expectation + with pytest.raises(exp, match=match): + val_str = "{}{}".format(value, str_unit) + expected_td = Timedelta(value, unit=unit) + + assert Timedelta(val_str, unit=unit) == expected_td + assert to_timedelta(val_str, unit=unit) == expected_td + assert all( + to_timedelta([val_str, val_str], unit=unit) + == to_timedelta([expected_td, expected_td]) + ) diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 38e77321418d1..86acf976fb1a1 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -9,6 +9,7 @@ import pandas as pd from pandas import Timedelta, TimedeltaIndex, offsets, to_timedelta import pandas._testing as tm +from pandas.core.tools.timedeltas import _coerce_scalar_to_timedelta_type as ct class TestTimedeltaUnaryOps: @@ -369,39 +370,36 @@ def test_short_format_converters(self): def conv(v): return v.astype("m8[ns]") - assert Timedelta("10") == np.timedelta64(10, "ns") - assert Timedelta("10ns") == np.timedelta64(10, "ns") - assert Timedelta("100") == np.timedelta64(100, "ns") - assert Timedelta("100ns") == np.timedelta64(100, "ns") - - assert Timedelta("1000") == np.timedelta64(1000, "ns") - assert Timedelta("1000ns") == np.timedelta64(1000, "ns") - assert Timedelta("1000NS") == np.timedelta64(1000, "ns") - - assert Timedelta("10us") == np.timedelta64(10000, "ns") - assert Timedelta("100us") == np.timedelta64(100000, "ns") - assert Timedelta("1000us") == np.timedelta64(1000000, "ns") - assert Timedelta("1000Us") == np.timedelta64(1000000, "ns") - assert Timedelta("1000uS") == np.timedelta64(1000000, "ns") - - assert Timedelta("1ms") == np.timedelta64(1000000, "ns") - assert Timedelta("10ms") == np.timedelta64(10000000, "ns") - assert Timedelta("100ms") == np.timedelta64(100000000, "ns") - assert Timedelta("1000ms") == np.timedelta64(1000000000, "ns") - - assert Timedelta("-1s") == -np.timedelta64(1000000000, "ns") - assert Timedelta("1s") == np.timedelta64(1000000000, "ns") - assert Timedelta("10s") == np.timedelta64(10000000000, "ns") - assert Timedelta("100s") == np.timedelta64(100000000000, "ns") - assert Timedelta("1000s") == np.timedelta64(1000000000000, "ns") - - assert Timedelta("1d") == conv(np.timedelta64(1, "D")) - assert Timedelta("-1d") == -conv(np.timedelta64(1, "D")) - assert Timedelta("1D") == conv(np.timedelta64(1, "D")) - assert Timedelta("10D") == conv(np.timedelta64(10, "D")) - assert Timedelta("100D") == conv(np.timedelta64(100, "D")) - assert Timedelta("1000D") == conv(np.timedelta64(1000, "D")) - assert Timedelta("10000D") == conv(np.timedelta64(10000, "D")) + assert ct("10ns") == np.timedelta64(10, "ns") + assert ct("100ns") == np.timedelta64(100, "ns") + + assert ct("1000ns") == np.timedelta64(1000, "ns") + assert ct("1000NS") == np.timedelta64(1000, "ns") + + assert ct("10us") == np.timedelta64(10000, "ns") + assert ct("100us") == np.timedelta64(100000, "ns") + assert ct("1000us") == np.timedelta64(1000000, "ns") + assert ct("1000Us") == np.timedelta64(1000000, "ns") + assert ct("1000uS") == np.timedelta64(1000000, "ns") + + assert ct("1ms") == np.timedelta64(1000000, "ns") + assert ct("10ms") == np.timedelta64(10000000, "ns") + assert ct("100ms") == np.timedelta64(100000000, "ns") + assert ct("1000ms") == np.timedelta64(1000000000, "ns") + + assert ct("-1s") == -np.timedelta64(1000000000, "ns") + assert ct("1s") == np.timedelta64(1000000000, "ns") + assert ct("10s") == np.timedelta64(10000000000, "ns") + assert ct("100s") == np.timedelta64(100000000000, "ns") + assert ct("1000s") == np.timedelta64(1000000000000, "ns") + + assert ct("1d") == conv(np.timedelta64(1, "D")) + assert ct("-1d") == -conv(np.timedelta64(1, "D")) + assert ct("1D") == conv(np.timedelta64(1, "D")) + assert ct("10D") == conv(np.timedelta64(10, "D")) + assert ct("100D") == conv(np.timedelta64(100, "D")) + assert ct("1000D") == conv(np.timedelta64(1000, "D")) + assert ct("10000D") == conv(np.timedelta64(10000, "D")) # space assert Timedelta(" 10000D ") == conv(np.timedelta64(10000, "D")) @@ -499,31 +497,33 @@ def test_implementation_limits(self): def test_total_seconds_precision(self): # GH 19458 assert Timedelta("30S").total_seconds() == 30.0 - assert Timedelta("0").total_seconds() == 0.0 + assert Timedelta("0ns").total_seconds() == 0.0 assert Timedelta("-2S").total_seconds() == -2.0 assert Timedelta("5.324S").total_seconds() == 5.324 assert (Timedelta("30S").total_seconds() - 30.0) < 1e-20 assert (30.0 - Timedelta("30S").total_seconds()) < 1e-20 - def test_resolution_string(self): - assert Timedelta(days=1).resolution_string == "D" - assert Timedelta(days=1, hours=6).resolution_string == "H" - assert Timedelta(days=1, minutes=6).resolution_string == "T" - assert Timedelta(days=1, seconds=6).resolution_string == "S" - assert Timedelta(days=1, milliseconds=6).resolution_string == "L" - assert Timedelta(days=1, microseconds=6).resolution_string == "U" - assert Timedelta(days=1, nanoseconds=6).resolution_string == "N" - - def test_resolution_deprecated(self): - # GH#21344 - td = Timedelta(days=4, hours=3) - result = td.resolution - assert result == Timedelta(nanoseconds=1) - - # Check that the attribute is available on the class, mirroring - # the stdlib timedelta behavior - result = Timedelta.resolution - assert result == Timedelta(nanoseconds=1) + def test_timedelta_arithmetic(self): + data = pd.Series(["nat", "32 days"], dtype="timedelta64[ns]") + deltas = [timedelta(days=1), Timedelta(1, unit="D")] + for delta in deltas: + result_method = data.add(delta) + result_operator = data + delta + expected = pd.Series(["nat", "33 days"], dtype="timedelta64[ns]") + tm.assert_series_equal(result_operator, expected) + tm.assert_series_equal(result_method, expected) + + result_method = data.sub(delta) + result_operator = data - delta + expected = pd.Series(["nat", "31 days"], dtype="timedelta64[ns]") + tm.assert_series_equal(result_operator, expected) + tm.assert_series_equal(result_method, expected) + # GH 9396 + result_method = data.div(delta) + result_operator = data / delta + expected = pd.Series([np.nan, 32.0], dtype="float64") + tm.assert_series_equal(result_operator, expected) + tm.assert_series_equal(result_method, expected) @pytest.mark.parametrize( @@ -535,7 +535,7 @@ def test_resolution_deprecated(self): (Timedelta(0, unit="ns"), False), (Timedelta(-10, unit="ns"), True), (Timedelta(None), True), - (NaT, True), + (pd.NaT, True), ], ) def test_truthiness(value, expected): diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 737e21af9242f..db095f8a43ad8 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -896,7 +896,7 @@ def test_getitem_unrecognized_scalar(): [ date_range("2014-01-01", periods=20, freq="MS"), period_range("2014-01", periods=20, freq="M"), - timedelta_range("0", periods=20, freq="H"), + timedelta_range("0ns", periods=20, freq="H"), ], ) def test_slice_with_zero_step_raises(index): @@ -915,7 +915,7 @@ def test_slice_with_zero_step_raises(index): [ date_range("2014-01-01", periods=20, freq="MS"), period_range("2014-01", periods=20, freq="M"), - timedelta_range("0", periods=20, freq="H"), + timedelta_range("0ns", periods=20, freq="H"), ], ) def test_slice_with_negative_step(index): diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index ff29df39e1871..561c890bfa31a 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -17,7 +17,7 @@ Series([True, False, True] * 3), Series(pd.date_range("20130101", periods=9)), Series(pd.date_range("20130101", periods=9, tz="US/Eastern")), - Series(pd.timedelta_range("2000", periods=9)), + Series(pd.timedelta_range("2000ns", periods=9)), ] ) def series(request):