Skip to content

BUG/API: Disallow unit if input to Timedelta and to_timedelta is/contains str #34634

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 8, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,7 @@ Timedelta
- Bug in :func:`timedelta_range` that produced an extra point on a edge case (:issue:`30353`, :issue:`33498`)
- Bug in :meth:`DataFrame.resample` that produced an extra point on a edge case (:issue:`30353`, :issue:`13022`, :issue:`33498`)
- Bug in :meth:`DataFrame.resample` that ignored the ``loffset`` argument when dealing with timedelta (:issue:`7687`, :issue:`33498`)
- Bug in :class:`Timedelta` and `pandas.to_timedelta` that ignored `unit`-argument for string input (:issue:`12136`)

Timezones
^^^^^^^^^
Expand Down
14 changes: 11 additions & 3 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ cdef convert_to_timedelta64(object ts, object unit):

@cython.boundscheck(False)
@cython.wraparound(False)
def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
def array_to_timedelta64(object[:] values, unit=None, errors='raise'):
"""
Convert an ndarray to an array of timedeltas. If errors == 'coerce',
coerce non-convertible objects to NaT. Otherwise, raise.
Expand All @@ -234,29 +234,35 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
n = values.shape[0]
result = np.empty(n, dtype='m8[ns]')
iresult = result.view('i8')
has_string = False

# Usually, we have all strings. If so, we hit the fast path.
# If this path fails, we try conversion a different way, and
# this is where all of the error handling will take place.
try:
for i in range(n):
if isinstance(values[i], str):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be in the clause on L251

has_string = True
if values[i] is NaT:
# we allow this check in the fast-path because NaT is a C-object
# so this is an inexpensive check
iresult[i] = NPY_NAT
else:
result[i] = parse_timedelta_string(values[i])
except (TypeError, ValueError):
unit = parse_timedelta_unit(unit)
parsed_unit = parse_timedelta_unit(unit or 'ns')
for i in range(n):
try:
result[i] = convert_to_timedelta64(values[i], unit)
result[i] = convert_to_timedelta64(values[i], parsed_unit)
except ValueError:
if errors == 'coerce':
result[i] = NPY_NAT
else:
raise

if has_string and unit is not None:
raise ValueError("unit must be un-specified if the input contains a str")

return iresult.base # .base to access underlying np.ndarray


Expand Down Expand Up @@ -1155,6 +1161,8 @@ class Timedelta(_Timedelta):
elif isinstance(value, _Timedelta):
value = value.value
elif isinstance(value, str):
if unit is not None:
raise ValueError("unit must be un-specified if the value is a str")
if len(value) > 0 and value[0] == 'P':
value = parse_iso_format_string(value)
else:
Expand Down
11 changes: 7 additions & 4 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,14 +876,15 @@ def f(x):
# Constructor Helpers


def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
def sequence_to_td64ns(data, copy=False, unit=None, errors="raise"):
"""
Parameters
----------
data : list-like
copy : bool, default False
unit : str, default "ns"
The timedelta unit to treat integers as multiples of.
Must be un-specifed if the data contains a str.
errors : {"raise", "coerce", "ignore"}, default "raise"
How to handle elements that cannot be converted to timedelta64[ns].
See ``pandas.to_timedelta`` for details.
Expand All @@ -906,7 +907,8 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
higher level.
"""
inferred_freq = None
unit = parse_timedelta_unit(unit)
if unit is not None:
unit = parse_timedelta_unit(unit)

# Unwrap whatever we have into a np.ndarray
if not hasattr(data, "dtype"):
Expand Down Expand Up @@ -936,7 +938,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
# cast the unit, multiply base/frac separately
# to avoid precision issues from float -> int
mask = np.isnan(data)
m, p = precision_from_unit(unit)
m, p = precision_from_unit(unit or "ns")
base = data.astype(np.int64)
frac = data - base
if p:
Expand Down Expand Up @@ -1002,7 +1004,7 @@ def ints_to_td64ns(data, unit="ns"):
return data, copy_made


def objects_to_td64ns(data, unit="ns", errors="raise"):
def objects_to_td64ns(data, unit=None, errors="raise"):
"""
Convert a object-dtyped or string-dtyped array into an
timedelta64[ns]-dtyped array.
Expand All @@ -1012,6 +1014,7 @@ def objects_to_td64ns(data, unit="ns", errors="raise"):
data : ndarray or Index
unit : str, default "ns"
The timedelta unit to treat integers as multiples of.
Must be un-specified if the data contains a str.
errors : {"raise", "coerce", "ignore"}, default "raise"
How to handle elements that cannot be converted to timedelta64[ns].
See ``pandas.to_timedelta`` for details.
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/computation/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,10 @@ def stringify(value):
v = v.tz_convert("UTC")
return TermValue(v, v.value, kind)
elif kind == "timedelta64" or kind == "timedelta":
v = Timedelta(v, unit="s").value
if isinstance(v, str):
v = Timedelta(v).value
else:
v = Timedelta(v, unit="s").value
return TermValue(int(v), v, kind)
elif meta == "category":
metadata = extract_array(self.metadata, extract_numpy=True)
Expand Down
11 changes: 8 additions & 3 deletions pandas/core/tools/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pandas.core.arrays.timedeltas import sequence_to_td64ns


def to_timedelta(arg, unit="ns", errors="raise"):
def to_timedelta(arg, unit=None, errors="raise"):
"""
Convert argument to timedelta.

Expand All @@ -27,6 +27,7 @@ def to_timedelta(arg, unit="ns", errors="raise"):
arg : str, timedelta, list-like or Series
The data to be converted to timedelta.
unit : str, default 'ns'
Must be un-specified if the arg is/contains a str.
Denotes the unit of the arg. Possible values:
('W', 'D', 'days', 'day', 'hours', hour', 'hr', 'h',
'm', 'minute', 'min', 'minutes', 'T', 'S', 'seconds',
Expand Down Expand Up @@ -76,7 +77,8 @@ def to_timedelta(arg, unit="ns", errors="raise"):
TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
dtype='timedelta64[ns]', freq=None)
"""
unit = parse_timedelta_unit(unit)
if unit is not None:
unit = parse_timedelta_unit(unit)

if errors not in ("ignore", "raise", "coerce"):
raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'}")
Expand Down Expand Up @@ -104,6 +106,9 @@ def to_timedelta(arg, unit="ns", errors="raise"):
"arg must be a string, timedelta, list, tuple, 1-d array, or Series"
)

if isinstance(arg, str) and unit is not None:
raise ValueError("unit must be un-specified if the input is/contains a str")

# ...so it must be a scalar value. Return scalar.
return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors)

Expand All @@ -124,7 +129,7 @@ def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"):
return result


def _convert_listlike(arg, unit="ns", errors="raise", name=None):
def _convert_listlike(arg, unit=None, errors="raise", name=None):
"""Convert a list of objects to a timedelta index object."""
if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"):
# This is needed only to ensure that in the case where we end up
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/scalar/timedelta/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,17 @@ def test_timedelta_constructor_identity():
expected = Timedelta(np.timedelta64(1, "s"))
result = Timedelta(expected)
assert result is expected


@pytest.mark.parametrize(
"constructor, value, unit, expectation",
[
(Timedelta, "10s", "ms", (ValueError, "unit must be un-specified")),
(to_timedelta, "10s", "ms", (ValueError, "unit must be un-specified")),
(to_timedelta, ["1", 2, 3], "s", (ValueError, "unit must be un-specified")),
],
)
def test_string_with_unit(constructor, value, unit, expectation):
exp, match = expectation
with pytest.raises(exp, match=match):
_ = constructor(value, unit=unit)