|
2 | 2 | Module contains tools for processing files into DataFrames or other objects
|
3 | 3 | """
|
4 | 4 | from __future__ import print_function
|
5 |
| -from pandas.compat import range, lrange, StringIO, lzip, zip, string_types |
| 5 | +from pandas.compat import range, lrange, StringIO, lzip, zip, string_types, map |
6 | 6 | from pandas import compat
|
7 | 7 | import re
|
8 | 8 | import csv
|
9 |
| -from warnings import warn |
10 | 9 |
|
11 | 10 | import numpy as np
|
12 | 11 |
|
@@ -266,7 +265,6 @@ def _read(filepath_or_buffer, kwds):
|
266 | 265 | 'buffer_lines': None,
|
267 | 266 | 'error_bad_lines': True,
|
268 | 267 | 'warn_bad_lines': True,
|
269 |
| - 'factorize': True, |
270 | 268 | 'dtype': None,
|
271 | 269 | 'decimal': b'.'
|
272 | 270 | }
|
@@ -340,8 +338,7 @@ def parser_f(filepath_or_buffer,
|
340 | 338 | encoding=None,
|
341 | 339 | squeeze=False,
|
342 | 340 | mangle_dupe_cols=True,
|
343 |
| - tupleize_cols=False, |
344 |
| - ): |
| 341 | + tupleize_cols=False): |
345 | 342 |
|
346 | 343 | # Alias sep -> delimiter.
|
347 | 344 | if delimiter is None:
|
@@ -400,8 +397,7 @@ def parser_f(filepath_or_buffer,
|
400 | 397 | low_memory=low_memory,
|
401 | 398 | buffer_lines=buffer_lines,
|
402 | 399 | mangle_dupe_cols=mangle_dupe_cols,
|
403 |
| - tupleize_cols=tupleize_cols, |
404 |
| - ) |
| 400 | + tupleize_cols=tupleize_cols) |
405 | 401 |
|
406 | 402 | return _read(filepath_or_buffer, kwds)
|
407 | 403 |
|
@@ -490,35 +486,34 @@ def _get_options_with_defaults(self, engine):
|
490 | 486 | kwds = self.orig_options
|
491 | 487 |
|
492 | 488 | options = {}
|
493 |
| - for argname, default in compat.iteritems(_parser_defaults): |
494 |
| - if argname in kwds: |
495 |
| - value = kwds[argname] |
496 |
| - else: |
497 |
| - value = default |
498 | 489 |
|
499 |
| - options[argname] = value |
| 490 | + for argname, default in compat.iteritems(_parser_defaults): |
| 491 | + options[argname] = kwds.get(argname, default) |
500 | 492 |
|
501 | 493 | for argname, default in compat.iteritems(_c_parser_defaults):
|
502 | 494 | if argname in kwds:
|
503 | 495 | value = kwds[argname]
|
| 496 | + |
504 | 497 | if engine != 'c' and value != default:
|
505 |
| - raise ValueError('%s is not supported with %s parser' % |
506 |
| - (argname, engine)) |
| 498 | + raise ValueError('The %r option is not supported with the' |
| 499 | + ' %r engine' % (argname, engine)) |
| 500 | + else: |
| 501 | + value = default |
507 | 502 | options[argname] = value
|
508 | 503 |
|
509 | 504 | if engine == 'python-fwf':
|
510 | 505 | for argname, default in compat.iteritems(_fwf_defaults):
|
511 |
| - if argname in kwds: |
512 |
| - value = kwds[argname] |
513 |
| - options[argname] = value |
| 506 | + options[argname] = kwds.get(argname, default) |
514 | 507 |
|
515 | 508 | return options
|
516 | 509 |
|
517 | 510 | def _clean_options(self, options, engine):
|
518 | 511 | result = options.copy()
|
519 | 512 |
|
520 | 513 | sep = options['delimiter']
|
521 |
| - if (sep is None and not options['delim_whitespace']): |
| 514 | + delim_whitespace = options['delim_whitespace'] |
| 515 | + |
| 516 | + if sep is None and not delim_whitespace: |
522 | 517 | if engine == 'c':
|
523 | 518 | print('Using Python parser to sniff delimiter')
|
524 | 519 | engine = 'python'
|
@@ -667,21 +662,24 @@ def __init__(self, kwds):
|
667 | 662 | self.header = kwds.get('header')
|
668 | 663 | if isinstance(self.header,(list,tuple,np.ndarray)):
|
669 | 664 | if kwds.get('as_recarray'):
|
670 |
| - raise Exception("cannot specify as_recarray when " |
671 |
| - "specifying a multi-index header") |
| 665 | + raise ValueError("cannot specify as_recarray when " |
| 666 | + "specifying a multi-index header") |
672 | 667 | if kwds.get('usecols'):
|
673 |
| - raise Exception("cannot specify usecols when " |
674 |
| - "specifying a multi-index header") |
| 668 | + raise ValueError("cannot specify usecols when " |
| 669 | + "specifying a multi-index header") |
675 | 670 | if kwds.get('names'):
|
676 |
| - raise Exception("cannot specify names when " |
677 |
| - "specifying a multi-index header") |
| 671 | + raise ValueError("cannot specify names when " |
| 672 | + "specifying a multi-index header") |
678 | 673 |
|
679 | 674 | # validate index_col that only contains integers
|
680 | 675 | if self.index_col is not None:
|
681 |
| - if not (isinstance(self.index_col,(list,tuple,np.ndarray)) and all( |
682 |
| - [ com.is_integer(i) for i in self.index_col ]) or com.is_integer(self.index_col)): |
683 |
| - raise Exception("index_col must only contain row numbers " |
684 |
| - "when specifying a multi-index header") |
| 676 | + is_sequence = isinstance(self.index_col, (list, tuple, |
| 677 | + np.ndarray)) |
| 678 | + if not (is_sequence and |
| 679 | + all(map(com.is_integer, self.index_col)) or |
| 680 | + com.is_integer(self.index_col)): |
| 681 | + raise ValueError("index_col must only contain row numbers " |
| 682 | + "when specifying a multi-index header") |
685 | 683 |
|
686 | 684 | self._name_processed = False
|
687 | 685 |
|
|
0 commit comments