@@ -987,24 +987,42 @@ def _evaluate_usecols(usecols, names):
987
987
988
988
def _validate_usecols_arg (usecols ):
989
989
"""
990
- Check whether or not the 'usecols' parameter
991
- contains all integers (column selection by index),
992
- strings (column by name) or is a callable. Raises
993
- a ValueError if that is not the case.
990
+ Validate the 'usecols' parameter.
991
+
992
+ Checks whether or not the 'usecols' parameter contains all integers
993
+ (column selection by index), strings (column by name) or is a callable.
994
+ Raises a ValueError if that is not the case.
995
+
996
+ Parameters
997
+ ----------
998
+ usecols : array-like, callable, or None
999
+ List of columns to use when parsing or a callable that can be used
1000
+ to filter a list of table columns.
1001
+
1002
+ Returns
1003
+ -------
1004
+ usecols_tuple : tuple
1005
+ A tuple of (verified_usecols, usecols_dtype).
1006
+
1007
+ 'verified_usecols' is either a set if an array-like is passed in or
1008
+ 'usecols' if a callable or None is passed in.
1009
+
1010
+ 'usecols_dtype` is the inferred dtype of 'usecols' if an array-like
1011
+ is passed in or None if a callable or None is passed in.
994
1012
"""
995
1013
msg = ("'usecols' must either be all strings, all unicode, "
996
1014
"all integers or a callable" )
997
1015
998
1016
if usecols is not None :
999
1017
if callable (usecols ):
1000
- return usecols
1018
+ return usecols , None
1001
1019
usecols_dtype = lib .infer_dtype (usecols )
1002
1020
if usecols_dtype not in ('empty' , 'integer' ,
1003
1021
'string' , 'unicode' ):
1004
1022
raise ValueError (msg )
1005
1023
1006
- return set (usecols )
1007
- return usecols
1024
+ return set (usecols ), usecols_dtype
1025
+ return usecols , None
1008
1026
1009
1027
1010
1028
def _validate_parse_dates_arg (parse_dates ):
@@ -1473,7 +1491,8 @@ def __init__(self, src, **kwds):
1473
1491
self ._reader = _parser .TextReader (src , ** kwds )
1474
1492
1475
1493
# XXX
1476
- self .usecols = _validate_usecols_arg (self ._reader .usecols )
1494
+ self .usecols , self .usecols_dtype = _validate_usecols_arg (
1495
+ self ._reader .usecols )
1477
1496
1478
1497
passed_names = self .names is None
1479
1498
@@ -1549,12 +1568,29 @@ def close(self):
1549
1568
pass
1550
1569
1551
1570
def _set_noconvert_columns (self ):
1571
+ """
1572
+ Set the columns that should not undergo dtype conversions.
1573
+
1574
+ Currently, any column that is involved with date parsing will not
1575
+ undergo such conversions.
1576
+ """
1552
1577
names = self .orig_names
1553
- usecols = self .usecols
1578
+ if self .usecols_dtype == 'integer' :
1579
+ # A set of integers will be converted to a list in
1580
+ # the correct order every single time.
1581
+ usecols = list (self .usecols )
1582
+ elif (callable (self .usecols ) or
1583
+ self .usecols_dtype not in ('empty' , None )):
1584
+ # The names attribute should have the correct columns
1585
+ # in the proper order for indexing with parse_dates.
1586
+ usecols = self .names [:]
1587
+ else :
1588
+ # Usecols is empty.
1589
+ usecols = None
1554
1590
1555
1591
def _set (x ):
1556
- if usecols and is_integer (x ):
1557
- x = list ( usecols ) [x ]
1592
+ if usecols is not None and is_integer (x ):
1593
+ x = usecols [x ]
1558
1594
1559
1595
if not is_integer (x ):
1560
1596
x = names .index (x )
@@ -1792,7 +1828,7 @@ def __init__(self, f, **kwds):
1792
1828
self .skipinitialspace = kwds ['skipinitialspace' ]
1793
1829
self .lineterminator = kwds ['lineterminator' ]
1794
1830
self .quoting = kwds ['quoting' ]
1795
- self .usecols = _validate_usecols_arg (kwds ['usecols' ])
1831
+ self .usecols , _ = _validate_usecols_arg (kwds ['usecols' ])
1796
1832
self .skip_blank_lines = kwds ['skip_blank_lines' ]
1797
1833
1798
1834
self .names_passed = kwds ['names' ] or None
0 commit comments