Skip to content

BUG: Don't error in pd.to_timedelta when errors=ignore #13832

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion asv_bench/benchmarks/timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,19 @@ def setup(self):
self.arr = ['00:00:{0:02d}'.format(i) for i in self.arr]

def time_timedelta_convert_string_seconds(self):
to_timedelta(self.arr)
to_timedelta(self.arr)


class timedelta_convert_bad_parse(object):
goal_time = 0.2

def setup(self):
self.arr = np.random.randint(0, 1000, size=10000)
self.arr = ['{0} days'.format(i) for i in self.arr]
self.arr[-1] = 'apple'

def time_timedelta_convert_coerce(self):
to_timedelta(self.arr, errors='coerce')

def time_timedelta_convert_ignore(self):
to_timedelta(self.arr, errors='ignore')
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -858,6 +858,7 @@ Bug Fixes
- Bug in ``groupby().cumsum()`` calculating ``cumprod`` when ``axis=1``. (:issue:`13994`)
- Bug in ``pd.read_csv()``, which may cause a segfault or corruption when iterating in large chunks over a stream/file under rare circumstances (:issue:`13703`)
- Bug in ``pd.read_csv()``, which caused BOM files to be incorrectly parsed by not ignoring the BOM (:issue:`4793`)
- Bug in ``pd.to_timedelta()`` in which the ``errors`` parameter was not being respected (:issue:`13613`)
- Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
- Bug in area plot draws legend incorrectly if subplot is enabled or legend is moved after plot (matplotlib 1.5.0 is required to draw area plot legend properly) (issue:`9161`, :issue:`13544`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
break
elif is_timedelta(val):
if convert_timedelta:
itimedeltas[i] = convert_to_timedelta64(val, 'ns', False)
itimedeltas[i] = convert_to_timedelta64(val, 'ns')
seen_timedelta = 1
else:
seen_object = 1
Expand Down
23 changes: 23 additions & 0 deletions pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,11 @@ def testit(unit, transform):

def test_to_timedelta_invalid(self):

# bad value for errors parameter
msg = "errors must be one of"
tm.assertRaisesRegexp(ValueError, msg, to_timedelta,
['foo'], errors='never')

# these will error
self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo'))
self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo'))
Expand All @@ -862,6 +867,24 @@ def test_to_timedelta_invalid(self):
to_timedelta(['1 day', 'bar', '1 min'],
errors='coerce'))

# gh-13613: these should not error because errors='ignore'
invalid_data = 'apple'
self.assertEqual(invalid_data, to_timedelta(
invalid_data, errors='ignore'))

invalid_data = ['apple', '1 days']
tm.assert_numpy_array_equal(
np.array(invalid_data, dtype=object),
to_timedelta(invalid_data, errors='ignore'))

invalid_data = pd.Index(['apple', '1 days'])
tm.assert_index_equal(invalid_data, to_timedelta(
invalid_data, errors='ignore'))

invalid_data = Series(['apple', '1 days'])
tm.assert_series_equal(invalid_data, to_timedelta(
invalid_data, errors='ignore'))

def test_to_timedelta_via_apply(self):
# GH 5458
expected = Series([np.timedelta64(1, 's')])
Expand Down
89 changes: 59 additions & 30 deletions pandas/tseries/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
"""

import numpy as np
import pandas as pd
import pandas.tslib as tslib

from pandas.types.common import (_ensure_object,
is_integer_dtype,
is_timedelta64_dtype,
Expand Down Expand Up @@ -64,37 +66,22 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise', coerce=None):
"""
unit = _validate_timedelta_unit(unit)

def _convert_listlike(arg, box, unit, name=None):

if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
arg = np.array(list(arg), dtype='O')

# these are shortcutable
if is_timedelta64_dtype(arg):
value = arg.astype('timedelta64[ns]')
elif is_integer_dtype(arg):
value = arg.astype('timedelta64[{0}]'.format(
unit)).astype('timedelta64[ns]', copy=False)
else:
value = tslib.array_to_timedelta64(_ensure_object(arg),
unit=unit, errors=errors)
value = value.astype('timedelta64[ns]', copy=False)

if box:
from pandas import TimedeltaIndex
value = TimedeltaIndex(value, unit='ns', name=name)
return value
if errors not in ('ignore', 'raise', 'coerce'):
raise ValueError("errors must be one of 'ignore', "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this tested?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Absolutely. That was one of the first tests that I wrote.

"'raise', or 'coerce'}")

if arg is None:
return arg
elif isinstance(arg, ABCSeries):
from pandas import Series
values = _convert_listlike(arg._values, box=False, unit=unit)
return Series(values, index=arg.index, name=arg.name, dtype='m8[ns]')
values = _convert_listlike(arg._values, unit=unit,
box=False, errors=errors)
return Series(values, index=arg.index, name=arg.name)
elif isinstance(arg, ABCIndexClass):
return _convert_listlike(arg, box=box, unit=unit, name=arg.name)
return _convert_listlike(arg, unit=unit, box=box,
errors=errors, name=arg.name)
elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 1:
return _convert_listlike(arg, box=box, unit=unit)
return _convert_listlike(arg, unit=unit, box=box, errors=errors)
elif getattr(arg, 'ndim', 1) > 1:
raise TypeError('arg must be a string, timedelta, list, tuple, '
'1-d array, or Series')
Expand Down Expand Up @@ -142,13 +129,55 @@ def _validate_timedelta_unit(arg):


def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'):
"""
convert strings to timedelta; coerce to Timedelta (if box), else
np.timedelta64
"""
"""Convert string 'r' to a timedelta object."""

try:
result = tslib.convert_to_timedelta64(r, unit)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tslib.convert_to_timedelta64 already accepts the coerce arg, you are doing extra unecessary things here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not in my implementation and for reasons that I explain below.

except ValueError:
if errors == 'raise':
raise
elif errors == 'ignore':
return r

# coerce
result = pd.NaT

result = tslib.convert_to_timedelta(r, unit, errors)
if box:
result = tslib.Timedelta(result)

return result


def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IOW this is correct, this needs to be fixed in cython

Copy link
Member Author

@gfyoung gfyoung Jul 29, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO the current layout is not correct.

The reason why the coerce handling moved to the Python level is because of the box check. In the current framework, if box=True and errors='ignore' but parsing fails, the conversion to Timedelta will raise when it shouldn't. We could move the box check into convert_to_timedelta(64), but that seems to be putting too much into one function, not to mention this is also called on the array_to_timedelta end.

"""Convert a list of objects to a timedelta index object."""

if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
arg = np.array(list(arg), dtype='O')

# these are shortcut-able
if is_timedelta64_dtype(arg):
value = arg.astype('timedelta64[ns]')
elif is_integer_dtype(arg):
value = arg.astype('timedelta64[{0}]'.format(
unit)).astype('timedelta64[ns]', copy=False)
else:
try:
value = tslib.array_to_timedelta64(_ensure_object(arg),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs to be handled in cython

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same explanation as above.

unit=unit, errors=errors)
value = value.astype('timedelta64[ns]', copy=False)
except ValueError:
if errors == 'ignore':
return arg
else:
# This else-block accounts for the cases when errors='raise'
# and errors='coerce'. If errors == 'raise', these errors
# should be raised. If errors == 'coerce', we shouldn't
# expect any errors to be raised, since all parsing errors
# cause coercion to pd.NaT. However, if an error / bug is
# introduced that causes an Exception to be raised, we would
# like to surface it.
raise

if box:
from pandas import TimedeltaIndex
value = TimedeltaIndex(value, unit='ns', name=name)
return value
2 changes: 1 addition & 1 deletion pandas/tslib.pxd
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from numpy cimport ndarray, int64_t

cdef convert_to_tsobject(object, object, object, bint, bint)
cdef convert_to_timedelta64(object, object, object)
cpdef convert_to_timedelta64(object, object)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NO, don't change this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had to, otherwise, it wouldn't be accessible in the Python world. This is really inconsequential anyways if you read the docs here.

cpdef object maybe_get_tz(object)
cdef bint _is_utc(object)
cdef bint _is_tzlocal(object)
Expand Down
Loading