From 26e965a2f57b5ff3d465659e2109b7b75f444b0d Mon Sep 17 00:00:00 2001 From: Weston Renoud Date: Mon, 23 Dec 2013 16:04:04 -0700 Subject: [PATCH] Fixes #5766 This is an issue in read_csv/read_table where there is no header and both usecols and names and assigned but the last column is not included. This caused an IndexError after reaching the last column specified in usecols. --- pandas/io/tests/test_parsers.py | 9 ++++----- pandas/parser.pyx | 5 ++++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 93a26b70a019e..7f3b9ecb2ca5b 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -2037,16 +2037,16 @@ def test_usecols(self): 4,5,6 7,8,9 10,11,12""" - result = self.read_csv(StringIO(data), names=['b', 'c'], - header=None, usecols=[1, 2]) + result = self.read_csv(StringIO(data), names=['a', 'b'], + header=None, usecols=[0, 1]) expected = self.read_csv(StringIO(data), names=['a', 'b', 'c'], header=None) - expected = expected[['b', 'c']] + expected = expected[['a', 'b']] tm.assert_frame_equal(result, expected) result2 = self.read_csv(StringIO(data), names=['a', 'b', 'c'], - header=None, usecols=['b', 'c']) + header=None, usecols=['a', 'b']) tm.assert_frame_equal(result2, result) # length conflict, passed names and usecols disagree @@ -2072,7 +2072,6 @@ def test_catch_too_many_names(self): 10,11,12\n""" tm.assertRaises(Exception, read_csv, StringIO(data), header=0, names=['a', 'b', 'c', 'd']) - class TestPythonParser(ParserTests, tm.TestCase): def test_negative_skipfooter_raises(self): text = """#foo,a,b,c diff --git a/pandas/parser.pyx b/pandas/parser.pyx index 36b4b91023a73..4a93499f880c9 100644 --- a/pandas/parser.pyx +++ b/pandas/parser.pyx @@ -1135,8 +1135,11 @@ cdef class TextReader: cdef _get_column_name(self, Py_ssize_t i, Py_ssize_t nused): if self.has_usecols and self.names is not None: - if len(self.names) == len(self.usecols): + if len(self.names) == len(self.usecols) and nused < len(self.names): return self.names[nused] + # addresses Issue #5766 + elif nused >= len(self.names): + return None else: return self.names[i - self.leading_cols] else: