Skip to content

BUG: Bug in to_timedelta that accepted invalid units and misinterpreted m/h (GH7611, GH6423) #7616

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 30, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/v0.14.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ Bug Fixes
- Bug in groupby ``.nth`` with a Series and integer-like column name (:issue:`7559`)

- Bug in ``value_counts`` where ``NaT`` did not qualify as missing (``NaN``) (:issue:`7423`)

- Bug in ``to_timedelta`` that accepted invalid units and misinterpreted 'm/h' (:issue:`7611`, :issue: `6423`)


- Bug in ``Panel.apply`` with a multi-index as an axis (:issue:`7469`)
Expand Down
32 changes: 26 additions & 6 deletions pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,20 +199,40 @@ def conv(v):
expected = Series([ np.timedelta64(1,'D') ]*5)
tm.assert_series_equal(result, expected)

def testit(unit, transform):

# array
result = to_timedelta(np.arange(5),unit=unit)
expected = Series([ np.timedelta64(i,transform(unit)) for i in np.arange(5).tolist() ])
tm.assert_series_equal(result, expected)

# scalar
result = to_timedelta(2,unit=unit)
expected = np.timedelta64(2,transform(unit)).astype('timedelta64[ns]')
self.assert_numpy_array_equal(result,expected)

# validate all units
# GH 6855
for unit in ['Y','M','W','D','y','w','d']:
result = to_timedelta(np.arange(5),unit=unit)
expected = Series([ np.timedelta64(i,unit.upper()) for i in np.arange(5).tolist() ])
tm.assert_series_equal(result, expected)
testit(unit,lambda x: x.upper())
for unit in ['days','day','Day','Days']:
testit(unit,lambda x: 'D')
for unit in ['h','m','s','ms','us','ns','H','S','MS','US','NS']:
result = to_timedelta(np.arange(5),unit=unit)
expected = Series([ np.timedelta64(i,unit.lower()) for i in np.arange(5).tolist() ])
tm.assert_series_equal(result, expected)
testit(unit,lambda x: x.lower())

# offsets

# m
testit('T',lambda x: 'm')

# ms
testit('L',lambda x: 'ms')

# these will error
self.assertRaises(ValueError, lambda : to_timedelta(['1h']))
self.assertRaises(ValueError, lambda : to_timedelta(['1m']))
self.assertRaises(ValueError, lambda : to_timedelta([1,2],unit='foo'))
self.assertRaises(ValueError, lambda : to_timedelta(1,unit='foo'))

def test_to_timedelta_via_apply(self):
_skip_if_numpy_not_friendly()
Expand Down
40 changes: 33 additions & 7 deletions pandas/tseries/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def to_timedelta(arg, box=True, unit='ns'):
if _np_version_under1p7:
raise ValueError("to_timedelta is not support for numpy < 1.7")

unit = _validate_timedelta_unit(unit)

def _convert_listlike(arg, box, unit):

if isinstance(arg, (list,tuple)):
Expand All @@ -40,7 +42,6 @@ def _convert_listlike(arg, box, unit):
if is_timedelta64_dtype(arg):
value = arg.astype('timedelta64[ns]')
elif is_integer_dtype(arg):
unit = _validate_timedelta_unit(unit)

# these are shortcutable
value = arg.astype('timedelta64[{0}]'.format(unit)).astype('timedelta64[ns]')
Expand All @@ -67,14 +68,39 @@ def _convert_listlike(arg, box, unit):
# ...so it must be a scalar value. Return scalar.
return _coerce_scalar_to_timedelta_type(arg, unit=unit)

_unit_map = {
'Y' : 'Y',
'y' : 'Y',
'W' : 'W',
'w' : 'W',
'D' : 'D',
'd' : 'D',
'days' : 'D',
'Days' : 'D',
'day' : 'D',
'Day' : 'D',
'M' : 'M',
'H' : 'h',
'h' : 'h',
'm' : 'm',
'T' : 'm',
'S' : 's',
's' : 's',
'L' : 'ms',
'MS' : 'ms',
'ms' : 'ms',
'US' : 'us',
'us' : 'us',
'NS' : 'ns',
'ns' : 'ns',
}

def _validate_timedelta_unit(arg):
""" provide validation / translation for timedelta short units """

if re.search("Y|W|D",arg,re.IGNORECASE) or arg == 'M':
return arg.upper()
elif re.search("h|m|s|ms|us|ns",arg,re.IGNORECASE):
return arg.lower()
raise ValueError("invalid timedelta unit {0} provided".format(arg))
try:
return _unit_map[arg]
except:
raise ValueError("invalid timedelta unit {0} provided".format(arg))

_short_search = re.compile(
"^\s*(?P<neg>-?)\s*(?P<value>\d*\.?\d*)\s*(?P<unit>d|s|ms|us|ns)?\s*$",re.IGNORECASE)
Expand Down
24 changes: 19 additions & 5 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1387,11 +1387,17 @@ cdef inline convert_to_timedelta64(object ts, object unit, object coerce):
else:
if util.is_array(ts):
ts = ts.astype('int64').item()
ts = cast_from_unit(ts, unit)
if _np_version_under1p7:
ts = timedelta(microseconds=ts/1000.0)
if unit in ['Y','M','W']:
if _np_version_under1p7:
raise ValueError("unsupported unit for native timedelta under this numpy {0}".format(unit))
else:
ts = np.timedelta64(ts,unit)
else:
ts = np.timedelta64(ts)
ts = cast_from_unit(ts, unit)
if _np_version_under1p7:
ts = timedelta(microseconds=ts/1000.0)
else:
ts = np.timedelta64(ts)
elif util.is_string_object(ts):
if ts in _nat_strings or coerce:
return np.timedelta64(iNaT)
Expand Down Expand Up @@ -1747,6 +1753,12 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except -1:
if unit == 'D' or unit == 'd':
m = 1000000000L * 86400
p = 6
elif unit == 'h':
m = 1000000000L * 3600
p = 6
elif unit == 'm':
m = 1000000000L * 60
p = 6
elif unit == 's':
m = 1000000000L
p = 6
Expand All @@ -1756,9 +1768,11 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except -1:
elif unit == 'us':
m = 1000L
p = 0
else:
elif unit == 'ns' or unit is None:
m = 1L
p = 0
else:
raise ValueError("cannot cast unit {0}".format(unit))

# just give me the unit back
if ts is None:
Expand Down