Skip to content

Commit ae5db23

Browse files
committed
BUG: raise more helpful error msg for #835
1 parent cdb5f7d commit ae5db23

File tree

2 files changed

+8
-10
lines changed

2 files changed

+8
-10
lines changed

RELEASE.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ pandas 0.7.1
5151
- Fix to_records where columns are non-strings (#822)
5252
- Fix Index.intersection where indices have incomparable types (#811)
5353
- Fix ExcelFile throwing an exception for two-line file (#837)
54-
- Add ability to suppress index inference in csv parser (related to #835)
54+
- Add clearer error message in csv parser (#835)
5555

5656
pandas 0.7.0
5757
============

pandas/io/parsers.py

+7-9
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def read_csv(filepath_or_buffer, sep=',', header=0, index_col=None, names=None,
9191
skiprows=None, na_values=None, parse_dates=False,
9292
date_parser=None, nrows=None, iterator=False, chunksize=None,
9393
skip_footer=0, converters=None, verbose=False, delimiter=None,
94-
encoding=None, infer_index=True):
94+
encoding=None):
9595
if hasattr(filepath_or_buffer, 'read'):
9696
f = filepath_or_buffer
9797
else:
@@ -117,8 +117,7 @@ def read_csv(filepath_or_buffer, sep=',', header=0, index_col=None, names=None,
117117
skip_footer=skip_footer,
118118
converters=converters,
119119
verbose=verbose,
120-
encoding=encoding,
121-
infer_index=infer_index)
120+
encoding=encoding)
122121

123122
if nrows is not None:
124123
return parser.get_chunk(nrows)
@@ -205,8 +204,6 @@ class TextParser(object):
205204
Number of line at bottom of file to skip
206205
encoding : string, default None
207206
Encoding to use for UTF when reading/writing (ex. 'utf-8')
208-
infer_index : boolean, default True
209-
If index_col is None, will try to infer index unless this is False
210207
"""
211208

212209
# common NA values
@@ -220,7 +217,7 @@ def __init__(self, f, delimiter=None, names=None, header=0,
220217
index_col=None, na_values=None, parse_dates=False,
221218
date_parser=None, chunksize=None, skiprows=None,
222219
skip_footer=0, converters=None, verbose=False,
223-
encoding=None, infer_index=True):
220+
encoding=None):
224221
"""
225222
Workhorse function for processing nested list into DataFrame
226223
@@ -237,7 +234,7 @@ def __init__(self, f, delimiter=None, names=None, header=0,
237234
self.chunksize = chunksize
238235
self.passed_names = names is not None
239236
self.encoding = encoding
240-
self.infer_index = infer_index
237+
241238

242239
if com.is_integer(skiprows):
243240
skiprows = range(skiprows)
@@ -404,7 +401,7 @@ def _get_index_name(self):
404401
return line
405402

406403
if implicit_first_cols > 0:
407-
if self.index_col is None and self.infer_index:
404+
if self.index_col is None:
408405
if implicit_first_cols == 1:
409406
self.index_col = 0
410407
else:
@@ -485,7 +482,8 @@ def get_chunk(self, rows=None):
485482

486483
if not index._verify_integrity():
487484
dups = index.get_duplicates()
488-
raise Exception('Index has duplicates: %s' % str(dups))
485+
err_msg = 'Tried columns 1-X as index but found duplicates %s'
486+
raise Exception(err_msg % str(dups))
489487

490488
if len(self.columns) != len(zipped_content):
491489
raise Exception('wrong number of columns')

0 commit comments

Comments
 (0)