Skip to content

Commit 4881213

Browse files
committed
CLN: clean up parser options
Also add a test to make sure that the C parser options validation is actually covered
1 parent 0a71f7a commit 4881213

File tree

3 files changed

+51
-29
lines changed

3 files changed

+51
-29
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,8 @@ Bug Fixes
564564
- Fixed a bug where ``groupby.plot()`` and friends were duplicating figures
565565
multiple times (:issue:`5102`).
566566
- Provide automatic conversion of ``object`` dtypes on fillna, related (:issue:`5103`)
567+
- Fixed a bug where default options were being overwritten in the option
568+
parser cleaning (:issue:`5121`).
567569

568570

569571
pandas 0.12.0

pandas/io/parsers.py

+26-24
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ def parser_f(filepath_or_buffer,
341341
squeeze=False,
342342
mangle_dupe_cols=True,
343343
tupleize_cols=False,
344-
):
344+
factorize=True):
345345

346346
# Alias sep -> delimiter.
347347
if delimiter is None:
@@ -401,7 +401,7 @@ def parser_f(filepath_or_buffer,
401401
buffer_lines=buffer_lines,
402402
mangle_dupe_cols=mangle_dupe_cols,
403403
tupleize_cols=tupleize_cols,
404-
)
404+
factorize=factorize)
405405

406406
return _read(filepath_or_buffer, kwds)
407407

@@ -490,35 +490,34 @@ def _get_options_with_defaults(self, engine):
490490
kwds = self.orig_options
491491

492492
options = {}
493-
for argname, default in compat.iteritems(_parser_defaults):
494-
if argname in kwds:
495-
value = kwds[argname]
496-
else:
497-
value = default
498493

499-
options[argname] = value
494+
for argname, default in compat.iteritems(_parser_defaults):
495+
options[argname] = kwds.get(argname, default)
500496

501497
for argname, default in compat.iteritems(_c_parser_defaults):
502498
if argname in kwds:
503499
value = kwds[argname]
500+
504501
if engine != 'c' and value != default:
505-
raise ValueError('%s is not supported with %s parser' %
506-
(argname, engine))
502+
raise ValueError('The %r option is not supported with the'
503+
' %r engine' % (argname, engine))
504+
else:
505+
value = default
507506
options[argname] = value
508507

509508
if engine == 'python-fwf':
510509
for argname, default in compat.iteritems(_fwf_defaults):
511-
if argname in kwds:
512-
value = kwds[argname]
513-
options[argname] = value
510+
options[argname] = kwds.get(argname, default)
514511

515512
return options
516513

517514
def _clean_options(self, options, engine):
518515
result = options.copy()
519516

520517
sep = options['delimiter']
521-
if (sep is None and not options['delim_whitespace']):
518+
delim_whitespace = options['delim_whitespace']
519+
520+
if sep is None and not delim_whitespace:
522521
if engine == 'c':
523522
print('Using Python parser to sniff delimiter')
524523
engine = 'python'
@@ -667,21 +666,24 @@ def __init__(self, kwds):
667666
self.header = kwds.get('header')
668667
if isinstance(self.header,(list,tuple,np.ndarray)):
669668
if kwds.get('as_recarray'):
670-
raise Exception("cannot specify as_recarray when "
671-
"specifying a multi-index header")
669+
raise ValueError("cannot specify as_recarray when "
670+
"specifying a multi-index header")
672671
if kwds.get('usecols'):
673-
raise Exception("cannot specify usecols when "
674-
"specifying a multi-index header")
672+
raise ValueError("cannot specify usecols when "
673+
"specifying a multi-index header")
675674
if kwds.get('names'):
676-
raise Exception("cannot specify names when "
677-
"specifying a multi-index header")
675+
raise ValueError("cannot specify names when "
676+
"specifying a multi-index header")
678677

679678
# validate index_col that only contains integers
680679
if self.index_col is not None:
681-
if not (isinstance(self.index_col,(list,tuple,np.ndarray)) and all(
682-
[ com.is_integer(i) for i in self.index_col ]) or com.is_integer(self.index_col)):
683-
raise Exception("index_col must only contain row numbers "
684-
"when specifying a multi-index header")
680+
is_sequence = isinstance(self.index_col, (list, tuple,
681+
np.ndarray))
682+
if not (is_sequence and
683+
all(com.is_integer(i) for i in self.index_col) or
684+
com.is_integer(self.index_col)):
685+
raise ValueError("index_col must only contain row numbers "
686+
"when specifying a multi-index header")
685687

686688
self._name_processed = False
687689

pandas/io/tests/test_parsers.py

+23-5
Original file line numberDiff line numberDiff line change
@@ -1230,17 +1230,17 @@ def test_header_multi_index(self):
12301230
#### invalid options ####
12311231

12321232
# no as_recarray
1233-
self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3],
1233+
self.assertRaises(ValueError, read_csv, StringIO(data), header=[0,1,2,3],
12341234
index_col=[0,1], as_recarray=True, tupleize_cols=False)
12351235

12361236
# names
1237-
self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3],
1237+
self.assertRaises(ValueError, read_csv, StringIO(data), header=[0,1,2,3],
12381238
index_col=[0,1], names=['foo','bar'], tupleize_cols=False)
12391239
# usecols
1240-
self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3],
1240+
self.assertRaises(ValueError, read_csv, StringIO(data), header=[0,1,2,3],
12411241
index_col=[0,1], usecols=['foo','bar'], tupleize_cols=False)
12421242
# non-numeric index_col
1243-
self.assertRaises(Exception, read_csv, StringIO(data), header=[0,1,2,3],
1243+
self.assertRaises(ValueError, read_csv, StringIO(data), header=[0,1,2,3],
12441244
index_col=['foo','bar'], tupleize_cols=False)
12451245

12461246
def test_pass_names_with_index(self):
@@ -2715,6 +2715,24 @@ def test_warn_if_chunks_have_mismatched_type(self):
27152715
df = self.read_csv(StringIO(data))
27162716
self.assertEqual(df.a.dtype, np.object)
27172717

2718+
def test_invalid_c_parser_opts_with_not_c_parser(self):
2719+
from pandas.io.parsers import _c_parser_defaults as c_defaults
2720+
2721+
data = """1,2,3,,
2722+
1,2,3,4,
2723+
1,2,3,4,5
2724+
1,2,,,
2725+
1,2,3,4,"""
2726+
2727+
engines = 'python', 'python-fwf'
2728+
for default in c_defaults:
2729+
for engine in engines:
2730+
kwargs = {default: object()}
2731+
with tm.assertRaisesRegexp(ValueError,
2732+
'The %r option is not supported '
2733+
'with the %r engine' % (default,
2734+
engine)):
2735+
read_csv(StringIO(data), engine=engine, **kwargs)
27182736

27192737
class TestParseSQL(unittest.TestCase):
27202738

@@ -2783,7 +2801,7 @@ def test_convert_sql_column_decimals(self):
27832801

27842802

27852803
def assert_same_values_and_dtype(res, exp):
2786-
assert(res.dtype == exp.dtype)
2804+
tm.assert_equal(res.dtype, exp.dtype)
27872805
tm.assert_almost_equal(res, exp)
27882806

27892807

0 commit comments

Comments
 (0)