Skip to content

Commit f7a8a2e

Browse files
committed
BUG: Bug in to_timedelta that accepted invalid units and misinterpreted m/h (GH7611, GH6423)
1 parent 11da541 commit f7a8a2e

File tree

4 files changed

+79
-19
lines changed

4 files changed

+79
-19
lines changed

doc/source/v0.14.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ Bug Fixes
176176
- Bug in groupby ``.nth`` with a Series and integer-like column name (:issue:`7559`)
177177

178178
- Bug in ``value_counts`` where ``NaT`` did not qualify as missing (``NaN``) (:issue:`7423`)
179-
179+
- Bug in ``to_timedelta`` that accepted invalid units and misinterpreted 'm/h' (:issue:`7611`, :issue: `6423`)
180180

181181

182182
- Bug in ``Panel.apply`` with a multi-index as an axis (:issue:`7469`)

pandas/tseries/tests/test_timedeltas.py

+26-6
Original file line numberDiff line numberDiff line change
@@ -199,20 +199,40 @@ def conv(v):
199199
expected = Series([ np.timedelta64(1,'D') ]*5)
200200
tm.assert_series_equal(result, expected)
201201

202+
def testit(unit, transform):
203+
204+
# array
205+
result = to_timedelta(np.arange(5),unit=unit)
206+
expected = Series([ np.timedelta64(i,transform(unit)) for i in np.arange(5).tolist() ])
207+
tm.assert_series_equal(result, expected)
208+
209+
# scalar
210+
result = to_timedelta(2,unit=unit)
211+
expected = np.timedelta64(2,transform(unit)).astype('timedelta64[ns]')
212+
self.assert_numpy_array_equal(result,expected)
213+
202214
# validate all units
203215
# GH 6855
204216
for unit in ['Y','M','W','D','y','w','d']:
205-
result = to_timedelta(np.arange(5),unit=unit)
206-
expected = Series([ np.timedelta64(i,unit.upper()) for i in np.arange(5).tolist() ])
207-
tm.assert_series_equal(result, expected)
217+
testit(unit,lambda x: x.upper())
218+
for unit in ['days','day','Day','Days']:
219+
testit(unit,lambda x: 'D')
208220
for unit in ['h','m','s','ms','us','ns','H','S','MS','US','NS']:
209-
result = to_timedelta(np.arange(5),unit=unit)
210-
expected = Series([ np.timedelta64(i,unit.lower()) for i in np.arange(5).tolist() ])
211-
tm.assert_series_equal(result, expected)
221+
testit(unit,lambda x: x.lower())
222+
223+
# offsets
224+
225+
# m
226+
testit('T',lambda x: 'm')
227+
228+
# ms
229+
testit('L',lambda x: 'ms')
212230

213231
# these will error
214232
self.assertRaises(ValueError, lambda : to_timedelta(['1h']))
215233
self.assertRaises(ValueError, lambda : to_timedelta(['1m']))
234+
self.assertRaises(ValueError, lambda : to_timedelta([1,2],unit='foo'))
235+
self.assertRaises(ValueError, lambda : to_timedelta(1,unit='foo'))
216236

217237
def test_to_timedelta_via_apply(self):
218238
_skip_if_numpy_not_friendly()

pandas/tseries/timedeltas.py

+33-7
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ def to_timedelta(arg, box=True, unit='ns'):
3232
if _np_version_under1p7:
3333
raise ValueError("to_timedelta is not support for numpy < 1.7")
3434

35+
unit = _validate_timedelta_unit(unit)
36+
3537
def _convert_listlike(arg, box, unit):
3638

3739
if isinstance(arg, (list,tuple)):
@@ -40,7 +42,6 @@ def _convert_listlike(arg, box, unit):
4042
if is_timedelta64_dtype(arg):
4143
value = arg.astype('timedelta64[ns]')
4244
elif is_integer_dtype(arg):
43-
unit = _validate_timedelta_unit(unit)
4445

4546
# these are shortcutable
4647
value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]')
@@ -67,14 +68,39 @@ def _convert_listlike(arg, box, unit):
6768
# ...so it must be a scalar value. Return scalar.
6869
return _coerce_scalar_to_timedelta_type(arg, unit=unit)
6970

71+
_unit_map = {
72+
'Y' : 'Y',
73+
'y' : 'Y',
74+
'W' : 'W',
75+
'w' : 'W',
76+
'D' : 'D',
77+
'd' : 'D',
78+
'days' : 'D',
79+
'Days' : 'D',
80+
'day' : 'D',
81+
'Day' : 'D',
82+
'M' : 'M',
83+
'H' : 'h',
84+
'h' : 'h',
85+
'm' : 'm',
86+
'T' : 'm',
87+
'S' : 's',
88+
's' : 's',
89+
'L' : 'ms',
90+
'MS' : 'ms',
91+
'ms' : 'ms',
92+
'US' : 'us',
93+
'us' : 'us',
94+
'NS' : 'ns',
95+
'ns' : 'ns',
96+
}
97+
7098
def _validate_timedelta_unit(arg):
7199
""" provide validation / translation for timedelta short units """
72-
73-
if re.search("Y|W|D",arg,re.IGNORECASE) or arg == 'M':
74-
return arg.upper()
75-
elif re.search("h|m|s|ms|us|ns",arg,re.IGNORECASE):
76-
return arg.lower()
77-
raise ValueError("invalid timedelta unit {0} provided".format(arg))
100+
try:
101+
return _unit_map[arg]
102+
except:
103+
raise ValueError("invalid timedelta unit {0} provided".format(arg))
78104

79105
_short_search = re.compile(
80106
"^\s*(?P<neg>-?)\s*(?P<value>\d*\.?\d*)\s*(?P<unit>d|s|ms|us|ns)?\s*$",re.IGNORECASE)

pandas/tslib.pyx

+19-5
Original file line numberDiff line numberDiff line change
@@ -1387,11 +1387,17 @@ cdef inline convert_to_timedelta64(object ts, object unit, object coerce):
13871387
else:
13881388
if util.is_array(ts):
13891389
ts = ts.astype('int64').item()
1390-
ts = cast_from_unit(ts, unit)
1391-
if _np_version_under1p7:
1392-
ts = timedelta(microseconds=ts/1000.0)
1390+
if unit in ['Y','M','W']:
1391+
if _np_version_under1p7:
1392+
raise ValueError("unsupported unit for native timedelta under this numpy {0}".format(unit))
1393+
else:
1394+
ts = np.timedelta64(ts,unit)
13931395
else:
1394-
ts = np.timedelta64(ts)
1396+
ts = cast_from_unit(ts, unit)
1397+
if _np_version_under1p7:
1398+
ts = timedelta(microseconds=ts/1000.0)
1399+
else:
1400+
ts = np.timedelta64(ts)
13951401
elif util.is_string_object(ts):
13961402
if ts in _nat_strings or coerce:
13971403
return np.timedelta64(iNaT)
@@ -1747,6 +1753,12 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except -1:
17471753
if unit == 'D' or unit == 'd':
17481754
m = 1000000000L * 86400
17491755
p = 6
1756+
elif unit == 'h':
1757+
m = 1000000000L * 3600
1758+
p = 6
1759+
elif unit == 'm':
1760+
m = 1000000000L * 60
1761+
p = 6
17501762
elif unit == 's':
17511763
m = 1000000000L
17521764
p = 6
@@ -1756,9 +1768,11 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except -1:
17561768
elif unit == 'us':
17571769
m = 1000L
17581770
p = 0
1759-
else:
1771+
elif unit == 'ns' or unit is None:
17601772
m = 1L
17611773
p = 0
1774+
else:
1775+
raise ValueError("cannot cast unit {0}".format(unit))
17621776

17631777
# just give me the unit back
17641778
if ts is None:

0 commit comments

Comments
 (0)