Skip to content

Commit 8b50d8c

Browse files
gfyoungjreback
authored andcommitted
BUG: Don't error in pd.to_timedelta when errors=ignore
Title is self-explanatory. Closes pandas-dev#13613. Author: gfyoung <[email protected]> Closes pandas-dev#13832 from gfyoung/to-timedelta-error-bug and squashes the following commits: dc39205 [gfyoung] BUG: Don't error in pd.to_timedelta when errors=ignore
1 parent f93ad1c commit 8b50d8c

File tree

7 files changed

+146
-108
lines changed

7 files changed

+146
-108
lines changed

asv_bench/benchmarks/timedelta.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,19 @@ def setup(self):
3131
self.arr = ['00:00:{0:02d}'.format(i) for i in self.arr]
3232

3333
def time_timedelta_convert_string_seconds(self):
34-
to_timedelta(self.arr)
34+
to_timedelta(self.arr)
35+
36+
37+
class timedelta_convert_bad_parse(object):
38+
goal_time = 0.2
39+
40+
def setup(self):
41+
self.arr = np.random.randint(0, 1000, size=10000)
42+
self.arr = ['{0} days'.format(i) for i in self.arr]
43+
self.arr[-1] = 'apple'
44+
45+
def time_timedelta_convert_coerce(self):
46+
to_timedelta(self.arr, errors='coerce')
47+
48+
def time_timedelta_convert_ignore(self):
49+
to_timedelta(self.arr, errors='ignore')

doc/source/whatsnew/v0.19.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,7 @@ Bug Fixes
858858
- Bug in ``groupby().cumsum()`` calculating ``cumprod`` when ``axis=1``. (:issue:`13994`)
859859
- Bug in ``pd.read_csv()``, which may cause a segfault or corruption when iterating in large chunks over a stream/file under rare circumstances (:issue:`13703`)
860860
- Bug in ``pd.read_csv()``, which caused BOM files to be incorrectly parsed by not ignoring the BOM (:issue:`4793`)
861+
- Bug in ``pd.to_timedelta()`` in which the ``errors`` parameter was not being respected (:issue:`13613`)
861862
- Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`)
862863
- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
863864
- Bug in area plot draws legend incorrectly if subplot is enabled or legend is moved after plot (matplotlib 1.5.0 is required to draw area plot legend properly) (issue:`9161`, :issue:`13544`)

pandas/src/inference.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -780,7 +780,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
780780
break
781781
elif is_timedelta(val):
782782
if convert_timedelta:
783-
itimedeltas[i] = convert_to_timedelta64(val, 'ns', False)
783+
itimedeltas[i] = convert_to_timedelta64(val, 'ns')
784784
seen_timedelta = 1
785785
else:
786786
seen_object = 1

pandas/tseries/tests/test_timedeltas.py

+23
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,11 @@ def testit(unit, transform):
845845

846846
def test_to_timedelta_invalid(self):
847847

848+
# bad value for errors parameter
849+
msg = "errors must be one of"
850+
tm.assertRaisesRegexp(ValueError, msg, to_timedelta,
851+
['foo'], errors='never')
852+
848853
# these will error
849854
self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo'))
850855
self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo'))
@@ -862,6 +867,24 @@ def test_to_timedelta_invalid(self):
862867
to_timedelta(['1 day', 'bar', '1 min'],
863868
errors='coerce'))
864869

870+
# gh-13613: these should not error because errors='ignore'
871+
invalid_data = 'apple'
872+
self.assertEqual(invalid_data, to_timedelta(
873+
invalid_data, errors='ignore'))
874+
875+
invalid_data = ['apple', '1 days']
876+
tm.assert_numpy_array_equal(
877+
np.array(invalid_data, dtype=object),
878+
to_timedelta(invalid_data, errors='ignore'))
879+
880+
invalid_data = pd.Index(['apple', '1 days'])
881+
tm.assert_index_equal(invalid_data, to_timedelta(
882+
invalid_data, errors='ignore'))
883+
884+
invalid_data = Series(['apple', '1 days'])
885+
tm.assert_series_equal(invalid_data, to_timedelta(
886+
invalid_data, errors='ignore'))
887+
865888
def test_to_timedelta_via_apply(self):
866889
# GH 5458
867890
expected = Series([np.timedelta64(1, 's')])

pandas/tseries/timedeltas.py

+59-30
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
"""
44

55
import numpy as np
6+
import pandas as pd
67
import pandas.tslib as tslib
8+
79
from pandas.types.common import (_ensure_object,
810
is_integer_dtype,
911
is_timedelta64_dtype,
@@ -64,37 +66,22 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise', coerce=None):
6466
"""
6567
unit = _validate_timedelta_unit(unit)
6668

67-
def _convert_listlike(arg, box, unit, name=None):
68-
69-
if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
70-
arg = np.array(list(arg), dtype='O')
71-
72-
# these are shortcutable
73-
if is_timedelta64_dtype(arg):
74-
value = arg.astype('timedelta64[ns]')
75-
elif is_integer_dtype(arg):
76-
value = arg.astype('timedelta64[{0}]'.format(
77-
unit)).astype('timedelta64[ns]', copy=False)
78-
else:
79-
value = tslib.array_to_timedelta64(_ensure_object(arg),
80-
unit=unit, errors=errors)
81-
value = value.astype('timedelta64[ns]', copy=False)
82-
83-
if box:
84-
from pandas import TimedeltaIndex
85-
value = TimedeltaIndex(value, unit='ns', name=name)
86-
return value
69+
if errors not in ('ignore', 'raise', 'coerce'):
70+
raise ValueError("errors must be one of 'ignore', "
71+
"'raise', or 'coerce'}")
8772

8873
if arg is None:
8974
return arg
9075
elif isinstance(arg, ABCSeries):
9176
from pandas import Series
92-
values = _convert_listlike(arg._values, box=False, unit=unit)
93-
return Series(values, index=arg.index, name=arg.name, dtype='m8[ns]')
77+
values = _convert_listlike(arg._values, unit=unit,
78+
box=False, errors=errors)
79+
return Series(values, index=arg.index, name=arg.name)
9480
elif isinstance(arg, ABCIndexClass):
95-
return _convert_listlike(arg, box=box, unit=unit, name=arg.name)
81+
return _convert_listlike(arg, unit=unit, box=box,
82+
errors=errors, name=arg.name)
9683
elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 1:
97-
return _convert_listlike(arg, box=box, unit=unit)
84+
return _convert_listlike(arg, unit=unit, box=box, errors=errors)
9885
elif getattr(arg, 'ndim', 1) > 1:
9986
raise TypeError('arg must be a string, timedelta, list, tuple, '
10087
'1-d array, or Series')
@@ -142,13 +129,55 @@ def _validate_timedelta_unit(arg):
142129

143130

144131
def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'):
145-
"""
146-
convert strings to timedelta; coerce to Timedelta (if box), else
147-
np.timedelta64
148-
"""
132+
"""Convert string 'r' to a timedelta object."""
133+
134+
try:
135+
result = tslib.convert_to_timedelta64(r, unit)
136+
except ValueError:
137+
if errors == 'raise':
138+
raise
139+
elif errors == 'ignore':
140+
return r
141+
142+
# coerce
143+
result = pd.NaT
149144

150-
result = tslib.convert_to_timedelta(r, unit, errors)
151145
if box:
152146
result = tslib.Timedelta(result)
153-
154147
return result
148+
149+
150+
def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None):
151+
"""Convert a list of objects to a timedelta index object."""
152+
153+
if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
154+
arg = np.array(list(arg), dtype='O')
155+
156+
# these are shortcut-able
157+
if is_timedelta64_dtype(arg):
158+
value = arg.astype('timedelta64[ns]')
159+
elif is_integer_dtype(arg):
160+
value = arg.astype('timedelta64[{0}]'.format(
161+
unit)).astype('timedelta64[ns]', copy=False)
162+
else:
163+
try:
164+
value = tslib.array_to_timedelta64(_ensure_object(arg),
165+
unit=unit, errors=errors)
166+
value = value.astype('timedelta64[ns]', copy=False)
167+
except ValueError:
168+
if errors == 'ignore':
169+
return arg
170+
else:
171+
# This else-block accounts for the cases when errors='raise'
172+
# and errors='coerce'. If errors == 'raise', these errors
173+
# should be raised. If errors == 'coerce', we shouldn't
174+
# expect any errors to be raised, since all parsing errors
175+
# cause coercion to pd.NaT. However, if an error / bug is
176+
# introduced that causes an Exception to be raised, we would
177+
# like to surface it.
178+
raise
179+
180+
if box:
181+
from pandas import TimedeltaIndex
182+
value = TimedeltaIndex(value, unit='ns', name=name)
183+
return value

pandas/tslib.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from numpy cimport ndarray, int64_t
22

33
cdef convert_to_tsobject(object, object, object, bint, bint)
4-
cdef convert_to_timedelta64(object, object, object)
4+
cpdef convert_to_timedelta64(object, object)
55
cpdef object maybe_get_tz(object)
66
cdef bint _is_utc(object)
77
cdef bint _is_tzlocal(object)

0 commit comments

Comments
 (0)