Skip to content

Commit d3f686b

Browse files
authored
BUG/API: Disallow unit if input to Timedelta and to_timedelta is/contains str (#34634)
1 parent 91241ae commit d3f686b

File tree

6 files changed

+46
-11
lines changed

6 files changed

+46
-11
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,7 @@ Timedelta
827827
- Bug in :func:`timedelta_range` that produced an extra point on a edge case (:issue:`30353`, :issue:`33498`)
828828
- Bug in :meth:`DataFrame.resample` that produced an extra point on a edge case (:issue:`30353`, :issue:`13022`, :issue:`33498`)
829829
- Bug in :meth:`DataFrame.resample` that ignored the ``loffset`` argument when dealing with timedelta (:issue:`7687`, :issue:`33498`)
830+
- Bug in :class:`Timedelta` and `pandas.to_timedelta` that ignored `unit`-argument for string input (:issue:`12136`)
830831

831832
Timezones
832833
^^^^^^^^^

pandas/_libs/tslibs/timedeltas.pyx

+12-3
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ cdef convert_to_timedelta64(object ts, object unit):
218218

219219
@cython.boundscheck(False)
220220
@cython.wraparound(False)
221-
def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
221+
def array_to_timedelta64(object[:] values, unit=None, errors='raise'):
222222
"""
223223
Convert an ndarray to an array of timedeltas. If errors == 'coerce',
224224
coerce non-convertible objects to NaT. Otherwise, raise.
@@ -235,6 +235,13 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
235235
result = np.empty(n, dtype='m8[ns]')
236236
iresult = result.view('i8')
237237

238+
if unit is not None:
239+
for i in range(n):
240+
if isinstance(values[i], str):
241+
raise ValueError(
242+
"unit must not be specified if the input contains a str"
243+
)
244+
238245
# Usually, we have all strings. If so, we hit the fast path.
239246
# If this path fails, we try conversion a different way, and
240247
# this is where all of the error handling will take place.
@@ -247,10 +254,10 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
247254
else:
248255
result[i] = parse_timedelta_string(values[i])
249256
except (TypeError, ValueError):
250-
unit = parse_timedelta_unit(unit)
257+
parsed_unit = parse_timedelta_unit(unit or 'ns')
251258
for i in range(n):
252259
try:
253-
result[i] = convert_to_timedelta64(values[i], unit)
260+
result[i] = convert_to_timedelta64(values[i], parsed_unit)
254261
except ValueError:
255262
if errors == 'coerce':
256263
result[i] = NPY_NAT
@@ -1155,6 +1162,8 @@ class Timedelta(_Timedelta):
11551162
elif isinstance(value, _Timedelta):
11561163
value = value.value
11571164
elif isinstance(value, str):
1165+
if unit is not None:
1166+
raise ValueError("unit must not be specified if the value is a str")
11581167
if len(value) > 0 and value[0] == 'P':
11591168
value = parse_iso_format_string(value)
11601169
else:

pandas/core/arrays/timedeltas.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -876,14 +876,15 @@ def f(x):
876876
# Constructor Helpers
877877

878878

879-
def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
879+
def sequence_to_td64ns(data, copy=False, unit=None, errors="raise"):
880880
"""
881881
Parameters
882882
----------
883883
data : list-like
884884
copy : bool, default False
885885
unit : str, default "ns"
886886
The timedelta unit to treat integers as multiples of.
887+
Must be un-specifed if the data contains a str.
887888
errors : {"raise", "coerce", "ignore"}, default "raise"
888889
How to handle elements that cannot be converted to timedelta64[ns].
889890
See ``pandas.to_timedelta`` for details.
@@ -906,7 +907,8 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
906907
higher level.
907908
"""
908909
inferred_freq = None
909-
unit = parse_timedelta_unit(unit)
910+
if unit is not None:
911+
unit = parse_timedelta_unit(unit)
910912

911913
# Unwrap whatever we have into a np.ndarray
912914
if not hasattr(data, "dtype"):
@@ -936,7 +938,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
936938
# cast the unit, multiply base/frac separately
937939
# to avoid precision issues from float -> int
938940
mask = np.isnan(data)
939-
m, p = precision_from_unit(unit)
941+
m, p = precision_from_unit(unit or "ns")
940942
base = data.astype(np.int64)
941943
frac = data - base
942944
if p:
@@ -1002,7 +1004,7 @@ def ints_to_td64ns(data, unit="ns"):
10021004
return data, copy_made
10031005

10041006

1005-
def objects_to_td64ns(data, unit="ns", errors="raise"):
1007+
def objects_to_td64ns(data, unit=None, errors="raise"):
10061008
"""
10071009
Convert a object-dtyped or string-dtyped array into an
10081010
timedelta64[ns]-dtyped array.
@@ -1012,6 +1014,7 @@ def objects_to_td64ns(data, unit="ns", errors="raise"):
10121014
data : ndarray or Index
10131015
unit : str, default "ns"
10141016
The timedelta unit to treat integers as multiples of.
1017+
Must not be specified if the data contains a str.
10151018
errors : {"raise", "coerce", "ignore"}, default "raise"
10161019
How to handle elements that cannot be converted to timedelta64[ns].
10171020
See ``pandas.to_timedelta`` for details.

pandas/core/computation/pytables.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,10 @@ def stringify(value):
200200
v = v.tz_convert("UTC")
201201
return TermValue(v, v.value, kind)
202202
elif kind == "timedelta64" or kind == "timedelta":
203-
v = Timedelta(v, unit="s").value
203+
if isinstance(v, str):
204+
v = Timedelta(v).value
205+
else:
206+
v = Timedelta(v, unit="s").value
204207
return TermValue(int(v), v, kind)
205208
elif meta == "category":
206209
metadata = extract_array(self.metadata, extract_numpy=True)

pandas/core/tools/timedeltas.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from pandas.core.arrays.timedeltas import sequence_to_td64ns
1414

1515

16-
def to_timedelta(arg, unit="ns", errors="raise"):
16+
def to_timedelta(arg, unit=None, errors="raise"):
1717
"""
1818
Convert argument to timedelta.
1919
@@ -27,6 +27,7 @@ def to_timedelta(arg, unit="ns", errors="raise"):
2727
arg : str, timedelta, list-like or Series
2828
The data to be converted to timedelta.
2929
unit : str, default 'ns'
30+
Must not be specified if the arg is/contains a str.
3031
Denotes the unit of the arg. Possible values:
3132
('W', 'D', 'days', 'day', 'hours', hour', 'hr', 'h',
3233
'm', 'minute', 'min', 'minutes', 'T', 'S', 'seconds',
@@ -76,7 +77,8 @@ def to_timedelta(arg, unit="ns", errors="raise"):
7677
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
7778
dtype='timedelta64[ns]', freq=None)
7879
"""
79-
unit = parse_timedelta_unit(unit)
80+
if unit is not None:
81+
unit = parse_timedelta_unit(unit)
8082

8183
if errors not in ("ignore", "raise", "coerce"):
8284
raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'}")
@@ -104,6 +106,9 @@ def to_timedelta(arg, unit="ns", errors="raise"):
104106
"arg must be a string, timedelta, list, tuple, 1-d array, or Series"
105107
)
106108

109+
if isinstance(arg, str) and unit is not None:
110+
raise ValueError("unit must not be specified if the input is/contains a str")
111+
107112
# ...so it must be a scalar value. Return scalar.
108113
return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors)
109114

@@ -124,7 +129,7 @@ def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"):
124129
return result
125130

126131

127-
def _convert_listlike(arg, unit="ns", errors="raise", name=None):
132+
def _convert_listlike(arg, unit=None, errors="raise", name=None):
128133
"""Convert a list of objects to a timedelta index object."""
129134
if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"):
130135
# This is needed only to ensure that in the case where we end up

pandas/tests/scalar/timedelta/test_constructors.py

+14
Original file line numberDiff line numberDiff line change
@@ -289,3 +289,17 @@ def test_timedelta_constructor_identity():
289289
expected = Timedelta(np.timedelta64(1, "s"))
290290
result = Timedelta(expected)
291291
assert result is expected
292+
293+
294+
@pytest.mark.parametrize(
295+
"constructor, value, unit, expectation",
296+
[
297+
(Timedelta, "10s", "ms", (ValueError, "unit must not be specified")),
298+
(to_timedelta, "10s", "ms", (ValueError, "unit must not be specified")),
299+
(to_timedelta, ["1", 2, 3], "s", (ValueError, "unit must not be specified")),
300+
],
301+
)
302+
def test_string_with_unit(constructor, value, unit, expectation):
303+
exp, match = expectation
304+
with pytest.raises(exp, match=match):
305+
_ = constructor(value, unit=unit)

0 commit comments

Comments
 (0)