-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ERR: improves message raised for bad names in usecols (GH14154) #14163
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -981,8 +981,7 @@ def _validate_usecols_arg(usecols): | |
|
||
if usecols is not None: | ||
usecols_dtype = lib.infer_dtype(usecols) | ||
if usecols_dtype not in ('empty', 'integer', | ||
'string', 'unicode'): | ||
if usecols_dtype not in ('empty', 'integer', 'string', 'unicode'): | ||
raise ValueError(msg) | ||
|
||
return set(usecols) | ||
|
@@ -1424,7 +1423,13 @@ def __init__(self, src, **kwds): | |
if (i in self.usecols or n in self.usecols)] | ||
|
||
if len(self.names) < len(self.usecols): | ||
raise ValueError("Usecols do not match names.") | ||
bad_cols = [n for n in self.usecols if n not in self.names] | ||
if len(bad_cols) > 0: | ||
raise ValueError(("%s specified in usecols but not found " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you prob also need a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback I'm trying to get the following test to work: msg = ("c, d specified in usecols but not found in names.")
with tm.assertRaisesRegexp(ValueError, msg):
self.read_csv(StringIO(data), names=['a', 'b'], usecols=['c', 'd'], header=None) Currently this test fails because two exceptions are being raised: one by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's possible we need to put this checking earlier |
||
"in names.") % bad_cols) | ||
else: | ||
raise ValueError(("Number of usecols is greater than " | ||
"number of names.")) | ||
|
||
self._set_noconvert_columns() | ||
|
||
|
@@ -2185,16 +2190,21 @@ def _handle_usecols(self, columns, usecols_key): | |
usecols_key is used if there are string usecols. | ||
""" | ||
if self.usecols is not None: | ||
if any([isinstance(u, string_types) for u in self.usecols]): | ||
if any([isinstance(c, string_types) for c in self.usecols]): | ||
if len(columns) > 1: | ||
raise ValueError("If using multiple headers, usecols must " | ||
"be integers.") | ||
bad_cols = [n for n in self.usecols if n not in usecols_key] | ||
if len(bad_cols) > 0: | ||
raise ValueError(("%s specified in usecols but not found " | ||
"in names.") % bad_cols) | ||
|
||
col_indices = [] | ||
for u in self.usecols: | ||
if isinstance(u, string_types): | ||
col_indices.append(usecols_key.index(u)) | ||
for c in self.usecols: | ||
if isinstance(c, string_types): | ||
col_indices.append(usecols_key.index(c)) | ||
else: | ||
col_indices.append(u) | ||
col_indices.append(c) | ||
else: | ||
col_indices = self.usecols | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -83,6 +83,8 @@ def test_usecols(self): | |
# length conflict, passed names and usecols disagree | ||
self.assertRaises(ValueError, self.read_csv, StringIO(data), | ||
names=['a', 'b'], usecols=[1], header=None) | ||
self.assertRaises(ValueError, self.read_csv, StringIO(data), | ||
names=['a', 'b'], usecols=['A'], header=None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. test with multiple bad ones (it will fail with a formatting error currently) |
||
|
||
def test_usecols_index_col_False(self): | ||
# see gh-9082 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
read
->pd.read_csv()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you also move this to 0.20.0.txt