pandas-dev · GGordonGordon · Nov 17, 2016 · jorisvandenbossche · Jan 2, 2017 · gfyoung
diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt
@@ -32,6 +32,7 @@ Bug Fixes
 
 
 
+- Bug in pd.read_csv - catch missing columns if usecols and header lengths match (:issue:`14671`)
 
 
 

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1425,12 +1425,14 @@ def __init__(self, src, **kwds):
         self.orig_names = self.names[:]
 
         if self.usecols:
-            if len(self.names) > len(self.usecols):
-                self.names = [n for i, n in enumerate(self.names)
-                              if (i in self.usecols or n in self.usecols)]
-
-            if len(self.names) < len(self.usecols):
-                raise ValueError("Usecols do not match names.")
+            if self._reader.file_header is not None:
+                h = self._reader.file_header[0]
+                usecol_len = len(set(self.usecols) - set(h))
+                usecoli_len = len(set(self.usecols) - set(range(0, len(h))))
+                if usecol_len > 0 and usecoli_len > 0:
+                    raise ValueError("Usecols do not match names.")
+
+            self.names = self._filter_usecols(self.names)
 
         self._set_noconvert_columns()
 

diff --git a/pandas/io/tests/parser/usecols.py b/pandas/io/tests/parser/usecols.py
@@ -54,6 +54,10 @@ def test_usecols(self):
         expected.columns = ['foo', 'bar']
         tm.assert_frame_equal(result, expected)
 
+        # same length but usecols column doesn't exist - see gh-14671
+        self.assertRaises(ValueError, self.read_csv, StringIO(data),
+                          usecols=['a', 'b', 'z'])
+
         data = """\
 1,2,3
 4,5,6

diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -290,7 +290,7 @@ cdef class TextReader:
         object na_values
         object memory_map
         object as_recarray
-        object header, orig_header, names, header_start, header_end
+        object header, orig_header, names, header_start, header_end, file_header
         object index_col
         object low_memory
         object skiprows
@@ -775,6 +775,12 @@ cdef class TextReader:
                 data_line = hr + 1
                 header.append(this_header)
 
+            self.file_header = header[:]
+
+            #if self.usecols is not None:
+            #    if len(set(self.usecols) - set(header[0])) > 0 and len(set(self.usecols) - set(range(0,field_count))) > 0:
+            #        raise ValueError("Usecols do not match names.")
+
             if self.names is not None:
                 header = [ self.names ]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -32,6 +32,7 @@ Bug Fixes



		- Bug in pd.read_csv - catch missing columns if usecols and header lengths match (:issue:`14671`)
Copy link Member jorisvandenbossche Jan 2, 2017 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Can you move this to 0.20.0.txt? Copy link Member gfyoung Jan 4, 2017 • edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. `pd.read_csv()` instead of `pd.read_csv` Let's generalize this a little. This PR is not actually about handling when `header` and `usecols` length match. It's about properly handling situations when `usecols` provides non-existent columns.



Expand Down