pandas-dev · jreback · Sep 27, 2013 · Sep 27, 2013
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -480,6 +480,7 @@ Bug Fixes
   - Fixed wrong check for overlapping in ``DatetimeIndex.union`` (:issue:`4564`)
   - Fixed conflict between thousands separator and date parser in csv_parser (:issue:`4678`)
   - Fix appending when dtypes are not the same (error showing mixing float/np.datetime64) (:issue:`4993`)
+  - Fixed wrong index name during read_csv if using usecols. Applies to c parser only. (:issue:`4201`)
 
 pandas 0.12.0
 -------------

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -2,7 +2,7 @@
 Module contains tools for processing files into DataFrames or other objects
 """
 from __future__ import print_function
-from pandas.compat import range, lrange, StringIO, lzip, zip
+from pandas.compat import range, lrange, StringIO, lzip, zip, string_types
 from pandas import compat
 import re
 import csv
@@ -15,7 +15,6 @@
 import datetime
 import pandas.core.common as com
 from pandas.core.config import get_option
-from pandas import compat
 from pandas.io.date_converters import generic_parser
 from pandas.io.common import get_filepath_or_buffer
 
@@ -24,7 +23,7 @@
 import pandas.lib as lib
 import pandas.tslib as tslib
 import pandas.parser as _parser
-from pandas.tseries.period import Period
+
 
 _parser_params = """Also supports optionally iterating or breaking of the file
 into chunks.
@@ -982,7 +981,19 @@ def __init__(self, src, **kwds):
             else:
                 self.names = lrange(self._reader.table_width)
 
-        # XXX
+        # If the names were inferred (not passed by user) and usedcols is defined,
+        # then ensure names refers to the used columns, not the document's columns.
+        if self.usecols and passed_names:
+            col_indices = []
+            for u in self.usecols:
+                if isinstance(u, string_types):
+                    col_indices.append(self.names.index(u))
+                else:
+                    col_indices.append(u)
+            self.names = [n for i, n in enumerate(self.names) if i in col_indices]
+            if len(self.names) < len(self.usecols):
+                raise ValueError("Usecols do not match names.")
+
         self._set_noconvert_columns()
 
         self.orig_names = self.names

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -1865,6 +1865,32 @@ def test_parse_integers_above_fp_precision(self):
 
         self.assertTrue(np.array_equal(result['Numbers'], expected['Numbers']))
 
+    def test_usecols_index_col_conflict(self):
+        # Issue 4201  Test that index_col as integer reflects usecols
+        data = """SecId,Time,Price,P2,P3
+10000,2013-5-11,100,10,1
+500,2013-5-12,101,11,1
+"""
+        expected = DataFrame({'Price': [100, 101]}, index=[datetime(2013, 5, 11), datetime(2013, 5, 12)])
+        expected.index.name = 'Time'
+
+        df = pd.read_csv(StringIO(data), usecols=['Time', 'Price'], parse_dates=True, index_col=0)
+        tm.assert_frame_equal(expected, df)
+
+        df = pd.read_csv(StringIO(data), usecols=['Time', 'Price'], parse_dates=True, index_col='Time')
+        tm.assert_frame_equal(expected, df)
+
+        df = pd.read_csv(StringIO(data), usecols=[1, 2], parse_dates=True, index_col='Time')
+        tm.assert_frame_equal(expected, df)
+
+        df = pd.read_csv(StringIO(data), usecols=[1, 2], parse_dates=True, index_col=0)
+        tm.assert_frame_equal(expected, df)
+
+        expected = DataFrame({'P3': [1, 1], 'Price': (100, 101), 'P2': (10, 11)})
+        expected = expected.set_index(['Price', 'P2'])
+        df = pd.read_csv(StringIO(data), usecols=['Price', 'P2', 'P3'], parse_dates=True, index_col=['Price', 'P2'])
+        tm.assert_frame_equal(expected, df)
+
 
 class TestPythonParser(ParserTests, unittest.TestCase):