From 1509a22dec879d0ef3a74dc80b86ecced7c65eaa Mon Sep 17 00:00:00 2001 From: Weston Renoud Date: Wed, 25 Dec 2013 02:52:28 +0200 Subject: [PATCH 1/2] TST: corner_case for read_csv with usecols GH5766 --- pandas/io/tests/test_parsers.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 93a26b70a019e..484c0c89fe72d 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -2049,6 +2049,16 @@ def test_usecols(self): header=None, usecols=['b', 'c']) tm.assert_frame_equal(result2, result) + + # 5766 + result = self.read_csv(StringIO(data), names=['a', 'b'], + header=None, usecols=[0, 1]) + + expected = self.read_csv(StringIO(data), names=['a', 'b', 'c'], + header=None) + expected = expected[['a', 'b']] + tm.assert_frame_equal(result, expected) + # length conflict, passed names and usecols disagree self.assertRaises(ValueError, self.read_csv, StringIO(data), names=['a', 'b'], usecols=[1], header=None) From 5e6e590e622bcf079421d013f0be1830fec20ca4 Mon Sep 17 00:00:00 2001 From: y-p Date: Wed, 25 Dec 2013 03:38:47 +0200 Subject: [PATCH 2/2] BUG: regression in read_csv parser handling of usecols GH5766 GH4406 fixed one bug and introduced (exposed?) this corner case where the loop over columns continues even after all usecols have been fulfilled, causing an out-of-bound lookup in names. --- pandas/parser.pyx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/parser.pyx b/pandas/parser.pyx index 36b4b91023a73..bb93097debf71 100644 --- a/pandas/parser.pyx +++ b/pandas/parser.pyx @@ -898,6 +898,9 @@ cdef class TextReader: if i < self.leading_cols: # Pass through leading columns always name = i + elif self.usecols and nused == len(self.usecols): + # Once we've gathered all requested columns, stop. GH5766 + break else: name = self._get_column_name(i, nused) if self.has_usecols and not (i in self.usecols or