Skip to content

BUG: to_numeric should raise if input is more than one dimension #11776 #11780

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 10, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,4 @@ Bug Fixes
- Bug in ``pd.rolling_median`` where memory allocation failed even with sufficient memory (:issue:`11696`)

- Bug in ``df.replace`` while replacing value in mixed dtype ``Dataframe`` (:issue:`11698`)
- Bug in ``to_numeric`` where it does not raise if input is more than one dimension (:issue:`11776`)
9 changes: 8 additions & 1 deletion pandas/tools/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ def test_error(self):
expected = pd.Series([1, -3.14, np.nan])
tm.assert_series_equal(res, expected)


def test_list(self):
s = ['1', '-3.14', '7']
res = to_numeric(s)
Expand All @@ -136,6 +135,14 @@ def test_all_nan(self):
expected = pd.Series([np.nan, np.nan, np.nan])
tm.assert_series_equal(res, expected)

def test_type_check(self):
# GH 11776
df = pd.DataFrame({'a': [1, -3.14, 7], 'b': ['4', '5', '6']})
with tm.assertRaisesRegexp(TypeError, "1-d array"):
to_numeric(df)
for errors in ['ignore', 'raise', 'coerce']:
with tm.assertRaisesRegexp(TypeError, "1-d array"):
to_numeric(df, errors=errors)

if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down
4 changes: 3 additions & 1 deletion pandas/tools/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def to_numeric(arg, errors='raise'):

Parameters
----------
arg : list, tuple or array of objects, or Series
arg : list, tuple, 1-d array, or Series
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
- If 'raise', then invalid parsing will raise an exception
- If 'coerce', then invalid parsing will be set as NaN
Expand Down Expand Up @@ -84,6 +84,8 @@ def to_numeric(arg, errors='raise'):
index, name = arg.index, arg.name
elif isinstance(arg, (list, tuple)):
arg = np.array(arg, dtype='O')
elif getattr(arg, 'ndim', 1) > 1:
raise TypeError('arg must be a list, tuple, 1-d array, or Series')

conv = arg
arg = com._ensure_object(arg)
Expand Down
10 changes: 10 additions & 0 deletions pandas/tseries/tests/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -2984,6 +2984,16 @@ def test_to_datetime_1703(self):
result = index.to_datetime()
self.assertEqual(result[0], Timestamp('1/1/2012'))

def test_to_datetime_dimensions(self):
# GH 11776
df = DataFrame({'a': ['1/1/2012', '1/2/2012'],
'b': ['12/30/2012', '12/31/2012']})
with tm.assertRaisesRegexp(TypeError, "1-d array"):
to_datetime(df)
for errors in ['ignore', 'raise', 'coerce']:
with tm.assertRaisesRegexp(TypeError, "1-d array"):
to_datetime(df, errors=errors)

def test_get_loc_msg(self):
idx = period_range('2000-1-1', freq='A', periods=10)
bad_period = Period('2012', 'A')
Expand Down
30 changes: 20 additions & 10 deletions pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,26 +444,36 @@ def check(value):

def test_timedelta_range(self):

expected = to_timedelta(np.arange(5),unit='D')
result = timedelta_range('0 days',periods=5,freq='D')
expected = to_timedelta(np.arange(5), unit='D')
result = timedelta_range('0 days', periods=5, freq='D')
tm.assert_index_equal(result, expected)

expected = to_timedelta(np.arange(11),unit='D')
result = timedelta_range('0 days','10 days',freq='D')
expected = to_timedelta(np.arange(11), unit='D')
result = timedelta_range('0 days', '10 days', freq='D')
tm.assert_index_equal(result, expected)

expected = to_timedelta(np.arange(5),unit='D') + Second(2) + Day()
result = timedelta_range('1 days, 00:00:02','5 days, 00:00:02',freq='D')
expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day()
result = timedelta_range('1 days, 00:00:02', '5 days, 00:00:02', freq='D')
tm.assert_index_equal(result, expected)

expected = to_timedelta([1,3,5,7,9],unit='D') + Second(2)
result = timedelta_range('1 days, 00:00:02',periods=5,freq='2D')
expected = to_timedelta([1,3,5,7,9], unit='D') + Second(2)
result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D')
tm.assert_index_equal(result, expected)

expected = to_timedelta(np.arange(50),unit='T')*30
result = timedelta_range('0 days',freq='30T',periods=50)
expected = to_timedelta(np.arange(50), unit='T') * 30
result = timedelta_range('0 days', freq='30T', periods=50)
tm.assert_index_equal(result, expected)

# GH 11776
arr = np.arange(10).reshape(2, 5)
df = pd.DataFrame(np.arange(10).reshape(2, 5))
for arg in (arr, df):
with tm.assertRaisesRegexp(TypeError, "1-d array"):
to_timedelta(arg)
for errors in ['ignore', 'raise', 'coerce']:
with tm.assertRaisesRegexp(TypeError, "1-d array"):
to_timedelta(arg, errors=errors)

# issue10583
df = pd.DataFrame(np.random.normal(size=(10,4)))
df.index = pd.timedelta_range(start='0s', periods=10, freq='s')
Expand Down
8 changes: 5 additions & 3 deletions pandas/tseries/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise', coerce=None):

Parameters
----------
arg : string, timedelta, array of strings (with possible NAs)
arg : string, timedelta, list, tuple, 1-d array, or Series
unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an integer/float number
box : boolean, default True
- If True returns a Timedelta/TimedeltaIndex of the results
Expand All @@ -37,7 +37,7 @@ def to_timedelta(arg, unit='ns', box=True, errors='raise', coerce=None):

def _convert_listlike(arg, box, unit, name=None):

if isinstance(arg, (list,tuple)) or ((hasattr(arg,'__iter__') and not hasattr(arg,'dtype'))):
if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note that there's no need to check hasattr(arg,'__iter__') because is_list_like already checks for that

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

np

arg = np.array(list(arg), dtype='O')

# these are shortcutable
Expand All @@ -62,8 +62,10 @@ def _convert_listlike(arg, box, unit, name=None):
return Series(values, index=arg.index, name=arg.name, dtype='m8[ns]')
elif isinstance(arg, ABCIndexClass):
return _convert_listlike(arg, box=box, unit=unit, name=arg.name)
elif is_list_like(arg):
elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 1:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this extra check for ndim is needed because a is_list_like(DataFrame) is True

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thxs

return _convert_listlike(arg, box=box, unit=unit)
elif getattr(arg, 'ndim', 1) > 1:
raise TypeError('arg must be a string, timedelta, list, tuple, 1-d array, or Series')

# ...so it must be a scalar value. Return scalar.
return _coerce_scalar_to_timedelta_type(arg, unit=unit, box=box, errors=errors)
Expand Down
7 changes: 4 additions & 3 deletions pandas/tseries/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,

Parameters
----------
arg : string, datetime, array of strings (with possible NAs)
arg : string, datetime, list, tuple, 1-d array, or Series
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
- If 'raise', then invalid parsing will raise an exception
- If 'coerce', then invalid parsing will be set as NaT
Expand Down Expand Up @@ -288,7 +288,7 @@ def _to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,

def _convert_listlike(arg, box, format, name=None):

if isinstance(arg, (list,tuple)):
if isinstance(arg, (list, tuple)):
arg = np.array(arg, dtype='O')

# these are shortcutable
Expand All @@ -312,8 +312,9 @@ def _convert_listlike(arg, box, format, name=None):
result = arg.astype('datetime64[ns]')
if box:
return DatetimeIndex(result, tz='utc' if utc else None, name=name)

return result
elif getattr(arg, 'ndim', 1) > 1:
raise TypeError('arg must be a string, datetime, list, tuple, 1-d array, or Series')

arg = com._ensure_object(arg)
require_iso8601 = False
Expand Down