diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 2243790a663df..416291916890a 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -826,6 +826,7 @@ Timedelta - Bug in :func:`timedelta_range` that produced an extra point on a edge case (:issue:`30353`, :issue:`33498`) - Bug in :meth:`DataFrame.resample` that produced an extra point on a edge case (:issue:`30353`, :issue:`13022`, :issue:`33498`) - Bug in :meth:`DataFrame.resample` that ignored the ``loffset`` argument when dealing with timedelta (:issue:`7687`, :issue:`33498`) +- Bug in :class:`Timedelta` and `pandas.to_timedelta` that ignored `unit`-argument for string input (:issue:`12136`) Timezones ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index a239804ea7bc2..a5b502f3f4071 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -218,7 +218,7 @@ cdef convert_to_timedelta64(object ts, object unit): @cython.boundscheck(False) @cython.wraparound(False) -def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): +def array_to_timedelta64(object[:] values, unit=None, errors='raise'): """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. @@ -235,6 +235,13 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): result = np.empty(n, dtype='m8[ns]') iresult = result.view('i8') + if unit is not None: + for i in range(n): + if isinstance(values[i], str): + raise ValueError( + "unit must not be specified if the input contains a str" + ) + # Usually, we have all strings. If so, we hit the fast path. # If this path fails, we try conversion a different way, and # this is where all of the error handling will take place. @@ -247,10 +254,10 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): else: result[i] = parse_timedelta_string(values[i]) except (TypeError, ValueError): - unit = parse_timedelta_unit(unit) + parsed_unit = parse_timedelta_unit(unit or 'ns') for i in range(n): try: - result[i] = convert_to_timedelta64(values[i], unit) + result[i] = convert_to_timedelta64(values[i], parsed_unit) except ValueError: if errors == 'coerce': result[i] = NPY_NAT @@ -1155,6 +1162,8 @@ class Timedelta(_Timedelta): elif isinstance(value, _Timedelta): value = value.value elif isinstance(value, str): + if unit is not None: + raise ValueError("unit must not be specified if the value is a str") if len(value) > 0 and value[0] == 'P': value = parse_iso_format_string(value) else: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index f439f07790274..d0657994dd81c 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -876,7 +876,7 @@ def f(x): # Constructor Helpers -def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): +def sequence_to_td64ns(data, copy=False, unit=None, errors="raise"): """ Parameters ---------- @@ -884,6 +884,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): copy : bool, default False unit : str, default "ns" The timedelta unit to treat integers as multiples of. + Must be un-specifed if the data contains a str. errors : {"raise", "coerce", "ignore"}, default "raise" How to handle elements that cannot be converted to timedelta64[ns]. See ``pandas.to_timedelta`` for details. @@ -906,7 +907,8 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): higher level. """ inferred_freq = None - unit = parse_timedelta_unit(unit) + if unit is not None: + unit = parse_timedelta_unit(unit) # Unwrap whatever we have into a np.ndarray if not hasattr(data, "dtype"): @@ -936,7 +938,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): # cast the unit, multiply base/frac separately # to avoid precision issues from float -> int mask = np.isnan(data) - m, p = precision_from_unit(unit) + m, p = precision_from_unit(unit or "ns") base = data.astype(np.int64) frac = data - base if p: @@ -1002,7 +1004,7 @@ def ints_to_td64ns(data, unit="ns"): return data, copy_made -def objects_to_td64ns(data, unit="ns", errors="raise"): +def objects_to_td64ns(data, unit=None, errors="raise"): """ Convert a object-dtyped or string-dtyped array into an timedelta64[ns]-dtyped array. @@ -1012,6 +1014,7 @@ def objects_to_td64ns(data, unit="ns", errors="raise"): data : ndarray or Index unit : str, default "ns" The timedelta unit to treat integers as multiples of. + Must not be specified if the data contains a str. errors : {"raise", "coerce", "ignore"}, default "raise" How to handle elements that cannot be converted to timedelta64[ns]. See ``pandas.to_timedelta`` for details. diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 15d9987310f18..001eb1789007f 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -200,7 +200,10 @@ def stringify(value): v = v.tz_convert("UTC") return TermValue(v, v.value, kind) elif kind == "timedelta64" or kind == "timedelta": - v = Timedelta(v, unit="s").value + if isinstance(v, str): + v = Timedelta(v).value + else: + v = Timedelta(v, unit="s").value return TermValue(int(v), v, kind) elif meta == "category": metadata = extract_array(self.metadata, extract_numpy=True) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 51b404b46f321..87eac93a6072c 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -13,7 +13,7 @@ from pandas.core.arrays.timedeltas import sequence_to_td64ns -def to_timedelta(arg, unit="ns", errors="raise"): +def to_timedelta(arg, unit=None, errors="raise"): """ Convert argument to timedelta. @@ -27,6 +27,7 @@ def to_timedelta(arg, unit="ns", errors="raise"): arg : str, timedelta, list-like or Series The data to be converted to timedelta. unit : str, default 'ns' + Must not be specified if the arg is/contains a str. Denotes the unit of the arg. Possible values: ('W', 'D', 'days', 'day', 'hours', hour', 'hr', 'h', 'm', 'minute', 'min', 'minutes', 'T', 'S', 'seconds', @@ -76,7 +77,8 @@ def to_timedelta(arg, unit="ns", errors="raise"): TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) """ - unit = parse_timedelta_unit(unit) + if unit is not None: + unit = parse_timedelta_unit(unit) if errors not in ("ignore", "raise", "coerce"): raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'}") @@ -104,6 +106,9 @@ def to_timedelta(arg, unit="ns", errors="raise"): "arg must be a string, timedelta, list, tuple, 1-d array, or Series" ) + if isinstance(arg, str) and unit is not None: + raise ValueError("unit must not be specified if the input is/contains a str") + # ...so it must be a scalar value. Return scalar. return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors) @@ -124,7 +129,7 @@ def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): return result -def _convert_listlike(arg, unit="ns", errors="raise", name=None): +def _convert_listlike(arg, unit=None, errors="raise", name=None): """Convert a list of objects to a timedelta index object.""" if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"): # This is needed only to ensure that in the case where we end up diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index c58994d738562..23fb25b838da6 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -289,3 +289,17 @@ def test_timedelta_constructor_identity(): expected = Timedelta(np.timedelta64(1, "s")) result = Timedelta(expected) assert result is expected + + +@pytest.mark.parametrize( + "constructor, value, unit, expectation", + [ + (Timedelta, "10s", "ms", (ValueError, "unit must not be specified")), + (to_timedelta, "10s", "ms", (ValueError, "unit must not be specified")), + (to_timedelta, ["1", 2, 3], "s", (ValueError, "unit must not be specified")), + ], +) +def test_string_with_unit(constructor, value, unit, expectation): + exp, match = expectation + with pytest.raises(exp, match=match): + _ = constructor(value, unit=unit)