Skip to content

Commit cce97ad

Browse files
Refactoring into _validate_usecols_arg as suggested
1 parent ba93833 commit cce97ad

File tree

1 file changed

+29
-46
lines changed

1 file changed

+29
-46
lines changed

pandas/io/parsers.py

+29-46
Original file line numberDiff line numberDiff line change
@@ -1065,38 +1065,6 @@ def _evaluate_usecols(usecols, names):
10651065
return usecols
10661066

10671067

1068-
def _validate_usecols(usecols, names):
1069-
"""
1070-
Validates that all usecols are present in a given
1071-
list of names. If not, raise a ValueError that
1072-
shows what usecols are missing.
1073-
1074-
Parameters
1075-
----------
1076-
usecols : iterable of usecols
1077-
The columns to validate are present in names.
1078-
names : iterable of names
1079-
The column names to check against.
1080-
1081-
Returns
1082-
-------
1083-
usecols : iterable of usecols
1084-
The `usecols` parameter if the validation succeeds.
1085-
1086-
Raises
1087-
------
1088-
ValueError : Columns were missing. Error message will list them.
1089-
"""
1090-
missing = [c for c in usecols if c not in names]
1091-
if len(missing) > 0:
1092-
raise ValueError(
1093-
"Usecols do not match columns, "
1094-
"columns expected but not found: {missing}".format(missing=missing)
1095-
)
1096-
1097-
return usecols
1098-
1099-
11001068
def _validate_skipfooter_arg(skipfooter):
11011069
"""
11021070
Validate the 'skipfooter' parameter.
@@ -1128,24 +1096,31 @@ def _validate_skipfooter_arg(skipfooter):
11281096
return skipfooter
11291097

11301098

1131-
def _validate_usecols_arg(usecols):
1099+
def _validate_usecols_arg(usecols, names=None):
11321100
"""
11331101
Validate the 'usecols' parameter.
11341102
11351103
Checks whether or not the 'usecols' parameter contains all integers
11361104
(column selection by index), strings (column by name) or is a callable.
11371105
Raises a ValueError if that is not the case.
11381106
1107+
If 'names' is passed, validates that all usecols are present
1108+
in a given list of names. If not, raise a ValueError that
1109+
shows what usecols are missing.
1110+
11391111
Parameters
11401112
----------
11411113
usecols : array-like, callable, or None
11421114
List of columns to use when parsing or a callable that can be used
11431115
to filter a list of table columns.
1116+
names: iterable, default None
1117+
Iterable of names to check usecols against.
1118+
11441119
11451120
Returns
11461121
-------
11471122
usecols_tuple : tuple
1148-
A tuple of (verified_usecols, usecols_dtype).
1123+
If names is not None, a tuple of (verified_usecols, usecols_dtype).
11491124
11501125
'verified_usecols' is either a set if an array-like is passed in or
11511126
'usecols' if a callable or None is passed in.
@@ -1156,16 +1131,24 @@ def _validate_usecols_arg(usecols):
11561131
msg = ("'usecols' must either be all strings, all unicode, "
11571132
"all integers or a callable")
11581133

1159-
if usecols is not None:
1160-
if callable(usecols):
1161-
return usecols, None
1162-
usecols_dtype = lib.infer_dtype(usecols)
1163-
if usecols_dtype not in ('empty', 'integer',
1164-
'string', 'unicode'):
1165-
raise ValueError(msg)
1134+
if names is None:
1135+
if usecols is not None:
1136+
if callable(usecols):
1137+
return usecols, None
1138+
usecols_dtype = lib.infer_dtype(usecols)
1139+
if usecols_dtype not in ('empty', 'integer',
1140+
'string', 'unicode'):
1141+
raise ValueError(msg)
11661142

1167-
return set(usecols), usecols_dtype
1168-
return usecols, None
1143+
return set(usecols), usecols_dtype
1144+
return usecols, None
1145+
else:
1146+
missing = [c for c in usecols if c not in names]
1147+
if len(missing) > 0:
1148+
raise ValueError(
1149+
"Usecols do not match columns, columns expected "
1150+
"but not found: {missing}".format(missing=missing)
1151+
)
11691152

11701153

11711154
def _validate_parse_dates_arg(parse_dates):
@@ -1694,14 +1677,14 @@ def __init__(self, src, **kwds):
16941677
# GH 14671
16951678
if (self.usecols_dtype == 'string' and
16961679
not set(usecols).issubset(self.orig_names)):
1697-
_validate_usecols(usecols, self.orig_names)
1680+
_validate_usecols_arg(usecols, self.orig_names)
16981681

16991682
if len(self.names) > len(usecols):
17001683
self.names = [n for i, n in enumerate(self.names)
17011684
if (i in usecols or n in usecols)]
17021685

17031686
if len(self.names) < len(usecols):
1704-
_validate_usecols(usecols, self.names)
1687+
_validate_usecols_arg(usecols, self.names)
17051688

17061689
self._set_noconvert_columns()
17071690

@@ -2480,7 +2463,7 @@ def _handle_usecols(self, columns, usecols_key):
24802463
try:
24812464
col_indices.append(usecols_key.index(col))
24822465
except ValueError:
2483-
_validate_usecols(self.usecols, usecols_key)
2466+
_validate_usecols_arg(self.usecols, usecols_key)
24842467
else:
24852468
col_indices.append(col)
24862469
else:

0 commit comments

Comments
 (0)