BUG: raise more helpful error msg for #835

adamklein · adamklein · commit ae5db23ebad2 · 2012-02-28T14:21:10.000-05:00
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -51,7 +51,7 @@ pandas 0.7.1
   - Fix to_records where columns are non-strings (#822)
   - Fix Index.intersection where indices have incomparable types (#811)
   - Fix ExcelFile throwing an exception for two-line file (#837)
-  - Add ability to suppress index inference in csv parser (related to #835)
+  - Add clearer error message in csv parser (#835)
 
 pandas 0.7.0
 ============
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -91,7 +91,7 @@ def read_csv(filepath_or_buffer, sep=',', header=0, index_col=None, names=None,
              skiprows=None, na_values=None, parse_dates=False,
              date_parser=None, nrows=None, iterator=False, chunksize=None,
              skip_footer=0, converters=None, verbose=False, delimiter=None,
-             encoding=None, infer_index=True):
+             encoding=None):
     if hasattr(filepath_or_buffer, 'read'):
         f = filepath_or_buffer
     else:
@@ -117,8 +117,7 @@ def read_csv(filepath_or_buffer, sep=',', header=0, index_col=None, names=None,
                         skip_footer=skip_footer,
                         converters=converters,
                         verbose=verbose,
-                        encoding=encoding,
-                        infer_index=infer_index)
+                        encoding=encoding)
 
     if nrows is not None:
         return parser.get_chunk(nrows)
@@ -205,8 +204,6 @@ class TextParser(object):
         Number of line at bottom of file to skip
     encoding : string, default None
         Encoding to use for UTF when reading/writing (ex. 'utf-8')
-    infer_index : boolean, default True
-        If index_col is None, will try to infer index unless this is False
     """
 
     # common NA values
@@ -220,7 +217,7 @@ def __init__(self, f, delimiter=None, names=None, header=0,
                  index_col=None, na_values=None, parse_dates=False,
                  date_parser=None, chunksize=None, skiprows=None,
                  skip_footer=0, converters=None, verbose=False,
-                 encoding=None, infer_index=True):
+                 encoding=None):
         """
         Workhorse function for processing nested list into DataFrame
 
@@ -237,7 +234,7 @@ def __init__(self, f, delimiter=None, names=None, header=0,
         self.chunksize = chunksize
         self.passed_names = names is not None
         self.encoding = encoding
-        self.infer_index = infer_index
+
 
         if com.is_integer(skiprows):
             skiprows = range(skiprows)
@@ -404,7 +401,7 @@ def _get_index_name(self):
                     return line
 
         if implicit_first_cols > 0:
-            if self.index_col is None and self.infer_index:
+            if self.index_col is None:
                 if implicit_first_cols == 1:
                     self.index_col = 0
                 else:
@@ -485,7 +482,8 @@ def get_chunk(self, rows=None):
 
         if not index._verify_integrity():
             dups = index.get_duplicates()
-            raise Exception('Index has duplicates: %s' % str(dups))
+            err_msg = 'Tried columns 1-X as index but found duplicates %s'
+            raise Exception(err_msg % str(dups))
 
         if len(self.columns) != len(zipped_content):
             raise Exception('wrong number of columns')