pandas-dev · jreback · Nov 21, 2018 · Nov 5, 2018 · Nov 5, 2018 · Nov 5, 2018
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1,6 +1,7 @@
 """
 Module contains tools for processing files into DataFrames or other objects
 """
+
 from __future__ import print_function
 
 from collections import defaultdict
@@ -71,14 +72,6 @@
     By file-like object, we refer to objects with a ``read()`` method, such as
     a file handler (e.g. via builtin ``open`` function) or ``StringIO``.
 %s
-delim_whitespace : boolean, default False
-    Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be
-    used as the sep. Equivalent to setting ``sep='\s+'``. If this option
-    is set to True, nothing should be passed in for the ``delimiter``
-    parameter.
-
-    .. versionadded:: 0.18.1 support for the Python parser.
-
 header : int or list of ints, default 'infer'
-header : int or list of ints, default 'infer'
+header : int, str or list of int, default 'infer'
-header : int or list of ints, default 'infer'
+header : int, str or list of int, default 'infer'
     Row number(s) to use as the column names, and the start of the
     data.  Default behavior is to infer the column names: if no names
@@ -101,7 +94,7 @@
     Column to use as the row labels of the DataFrame. If a sequence is given, a
     MultiIndex is used. If you have a malformed file with delimiters at the end
     of each line, you might consider index_col=False to force pandas to _not_
-    use the first column as the index (row names)
+    use the first column as the index (row names).
 usecols : list-like or callable, default None
     Return a subset of the columns. If list-like, all elements must either
     be positional (i.e. integer indices into the document columns) or strings
@@ -120,11 +113,11 @@
     example of a valid callable argument would be ``lambda x: x.upper() in
     ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster
     parsing time and lower memory usage.
-squeeze : boolean, default False
-    If the parsed data only contains one column then return a Series
+squeeze : bool, default False
+    If the parsed data only contains one column then return a Series.
 prefix : str, default None
     Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
-mangle_dupe_cols : boolean, default True
+mangle_dupe_cols : bool, default True
     Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
     'X'...'X'. Passing in False will cause data to be overwritten if there
     are duplicate names in the columns.
@@ -137,24 +130,24 @@
 %s
 converters : dict, default None
-converters : dict, default None
+converters : dict, optional
-converters : dict, default None
+converters : dict, optional
     Dict of functions for converting values in certain columns. Keys can either
-    be integers or column labels
+    be integers or column labels.
 true_values : list, default None
-    Values to consider as True
+    Values to consider as True.
 false_values : list, default None
-    Values to consider as False
-skipinitialspace : boolean, default False
+    Values to consider as False.
+skipinitialspace : bool, default False
     Skip spaces after delimiter.
-skiprows : list-like or integer or callable, default None
+skiprows : list-like or int or callable, default None
-skiprows : list-like or int or callable, default None
+skiprows : list-like, int or callable, optional
-skiprows : list-like or int or callable, default None
+skiprows : list-like, int or callable, optional
     Line numbers to skip (0-indexed) or number of lines to skip (int)
     at the start of the file.
 
     If callable, the callable function will be evaluated against the row
     indices, returning True if the row should be skipped and False otherwise.
     An example of a valid callable argument would be ``lambda x: x in [0, 2]``.
 skipfooter : int, default 0
-    Number of lines at bottom of file to skip (Unsupported with engine='c')
+    Number of lines at bottom of file to skip (Unsupported with engine='c').
 nrows : int, default None
-    Number of rows of file to read. Useful for reading pieces of large files
+    Number of rows of file to read. Useful for reading pieces of large files.
 na_values : scalar, str, list-like, or dict, default None
     Additional strings to recognize as NA/NaN. If dict passed, specific
     per-column NA values.  By default the following values are interpreted as
@@ -175,16 +168,17 @@
 
     Note that if `na_filter` is passed in as False, the `keep_default_na` and
     `na_values` parameters will be ignored.
-na_filter : boolean, default True
+na_filter : bool, default True
     Detect missing value markers (empty strings and the value of na_values). In
     data without any NAs, passing na_filter=False can improve the performance
-    of reading a large file
-verbose : boolean, default False
-    Indicate number of NA values placed in non-numeric columns
-skip_blank_lines : boolean, default True
-    If True, skip over blank lines rather than interpreting as NaN values
-parse_dates : boolean or list of ints or names or list of lists or dict, \
+    of reading a large file.
+verbose : bool, default False
+    Indicate number of NA values placed in non-numeric columns.
+skip_blank_lines : bool, default True
+    If True, skip over blank lines rather than interpreting as NaN values.
+parse_dates : bool or list of ints or names or list of lists or dict, \
 default False
+    The behavior is as follows:
 
     * boolean. If True -> try parsing the index.
     * list of ints or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
@@ -199,12 +193,12 @@
     datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv``
 
     Note: A fast-path exists for iso8601-formatted dates.
-infer_datetime_format : boolean, default False
+infer_datetime_format : bool, default False
     If True and `parse_dates` is enabled, pandas will attempt to infer the
     format of the datetime strings in the columns, and if it can be inferred,
     switch to a faster method of parsing them. In some cases this can increase
     the parsing speed by 5-10x.
-keep_date_col : boolean, default False
+keep_date_col : bool, default False
     If True and `parse_dates` specifies combining multiple columns then
     keep the original columns.
 date_parser : function, default None
@@ -217,9 +211,9 @@
     and pass that; and 3) call `date_parser` once for each row using one or
     more strings (corresponding to the columns defined by `parse_dates`) as
     arguments.
-dayfirst : boolean, default False
-    DD/MM format dates, international and European format
-iterator : boolean, default False
+dayfirst : bool, default False
+    DD/MM format dates, international and European format.
+iterator : bool, default False
     Return TextFileReader object for iteration or getting chunks with
     ``get_chunk()``.
 chunksize : int, default None
-chunksize : int, default None
+chunksize : int, optional
-chunksize : int, default None
+chunksize : int, optional
@@ -237,14 +231,9 @@
     .. versionadded:: 0.18.1 support for 'zip' and 'xz' compression.
 
 thousands : str, default None
-thousands : str, default None
+thousands : str, optional
-thousands : str, default None
+thousands : str, optional
-    Thousands separator
+    Thousands separator.
 decimal : str, default '.'
     Character to recognize as decimal point (e.g. use ',' for European data).
-float_precision : string, default None
-    Specifies which converter the C engine should use for floating-point
-    values. The options are `None` for the ordinary converter,
-    `high` for the high-precision converter, and `round_trip` for the
-    round-trip converter.
 lineterminator : str (length 1), default None
     Character to break file into lines. Only valid with C parser.
 quotechar : str (length 1), optional
@@ -253,7 +242,7 @@
 quoting : int or csv.QUOTE_* instance, default 0
     Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of
     QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).
-doublequote : boolean, default ``True``
+doublequote : bool, default ``True``
    When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate
    whether or not to interpret two consecutive quotechar elements INSIDE a
    field as a single ``quotechar`` element.
@@ -270,43 +259,55 @@
 encoding : str, default None
     Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python
     standard encodings
-    <https://docs.python.org/3/library/codecs.html#standard-encodings>`_
+    <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ .
 dialect : str or csv.Dialect instance, default None
-dialect : str or csv.Dialect instance, default None
+dialect : str or csv.Dialect, optional
-dialect : str or csv.Dialect instance, default None
+dialect : str or csv.Dialect, optional
     If provided, this parameter will override values (default or not) for the
     following parameters: `delimiter`, `doublequote`, `escapechar`,
     `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
     override values, a ParserWarning will be issued. See csv.Dialect
     documentation for more details.
-tupleize_cols : boolean, default False
+tupleize_cols : bool, default False
+    Leave a list of tuples on columns as is (default is to convert to
+    a MultiIndex on the columns).
     .. deprecated:: 0.21.0
        This argument will be removed and will always convert to MultiIndex
 
-    Leave a list of tuples on columns as is (default is to convert to
-    a MultiIndex on the columns)
-error_bad_lines : boolean, default True
+error_bad_lines : bool, default True
     Lines with too many fields (e.g. a csv line with too many commas) will by
     default cause an exception to be raised, and no DataFrame will be returned.
     If False, then these "bad lines" will dropped from the DataFrame that is
     returned.
-warn_bad_lines : boolean, default True
+warn_bad_lines : bool, default True
     If error_bad_lines is False, and warn_bad_lines is True, a warning for each
     "bad line" will be output.
-low_memory : boolean, default True
+delim_whitespace : bool, default False
+    Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be
+    used as the sep. Equivalent to setting ``sep='\\s+'``. If this option
+    is set to True, nothing should be passed in for the ``delimiter``
+    parameter.
+
+    .. versionadded:: 0.18.1 support for the Python parser.
+
+low_memory : bool, default True
     Internally process the file in chunks, resulting in lower memory use
     while parsing, but possibly mixed type inference.  To ensure no mixed
     types either set False, or specify the type with the `dtype` parameter.
     Note that the entire file is read into a single DataFrame regardless,
     use the `chunksize` or `iterator` parameter to return the data in chunks.
-    (Only valid with C parser)
-memory_map : boolean, default False
+    (Only valid with C parser).
+memory_map : bool, default False
     If a filepath is provided for `filepath_or_buffer`, map the file object
     directly onto memory and access the data directly from there. Using this
     option can improve performance because there is no longer any I/O overhead.
+float_precision : str, default None
-float_precision : str, default None
+float_precision : str, optional
-float_precision : str, default None
+float_precision : str, optional
+    Specifies which converter the C engine should use for floating-point
+    values. The options are `None` for the ordinary converter,
+    `high` for the high-precision converter, and `round_trip` for the
+    round-trip converter.
 
 Returns
 -------
-result : DataFrame or TextParser
-"""
+result : DataFrame or TextParser"""
-result : DataFrame or TextParser"""
+DataFrame or TextParser"""
-result : DataFrame or TextParser"""
+DataFrame or TextParser"""
 
 # engine is not used in read_fwf() so is factored out of the shared docstring
 _engine_doc = """engine : {'c', 'python'}, optional
@@ -320,23 +321,23 @@
     tool, ``csv.Sniffer``. In addition, separators longer than 1 character and
     different from ``'\s+'`` will be interpreted as regular expressions and
     will also force the use of the Python parsing engine. Note that regex
-    delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``
+    delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``.
 delimiter : str, default ``None``
-    Alternative argument name for sep."""
+    Alternative argument name for sep.
-    Alternative argument name for sep.
+    Alias for sep.
-    Alternative argument name for sep.
+    Alias for sep.
+    """
 
 _read_csv_doc = """
-Read CSV (comma-separated) file into DataFrame
+Read CSV (comma-separated) file into DataFrame.
 
 %s
 """ % (_parser_params % (_sep_doc.format(default="','"), _engine_doc))
 
 _read_table_doc = """
+Read general delimited file into DataFrame.
 
 .. deprecated:: 0.24.0
    Use :func:`pandas.read_csv` instead, passing ``sep='\t'`` if necessary.
 
-Read general delimited file into DataFrame
-
 %s
 """ % (_parser_params % (_sep_doc.format(default="\\t (tab-stop)"),
                          _engine_doc))
@@ -358,7 +359,7 @@
 """
 
 _read_fwf_doc = """
-Read a table of fixed-width formatted lines into DataFrame
+Read a table of fixed-width formatted lines into DataFrame.
 
 %s
 """ % (_parser_params % (_fwf_widths, ''))
@@ -468,10 +469,10 @@ def _read(filepath_or_buffer, kwds):
 _parser_defaults = {
     'delimiter': None,
 
-    'doublequote': True,
     'escapechar': None,
     'quotechar': '"',
     'quoting': csv.QUOTE_MINIMAL,
+    'doublequote': True,
     'skipinitialspace': False,
     'lineterminator': None,
 
@@ -480,14 +481,16 @@ def _read(filepath_or_buffer, kwds):
     'names': None,
     'prefix': None,
     'skiprows': None,
+    'skipfooter': 0,
+    'nrows': None,
     'na_values': None,
+    'keep_default_na': True,
+
     'true_values': None,
     'false_values': None,
     'converters': None,
     'dtype': None,
-    'skipfooter': 0,
 
-    'keep_default_na': True,
     'thousands': None,
     'comment': None,
     'decimal': b'.',
@@ -497,10 +500,8 @@ def _read(filepath_or_buffer, kwds):
     'keep_date_col': False,
     'dayfirst': False,
     'date_parser': None,
-
     'usecols': None,
 
-    'nrows': None,
     # 'iterator': False,
     'chunksize': None,
     'verbose': False,
@@ -573,6 +574,7 @@ def parser_f(filepath_or_buffer,
                  false_values=None,
                  skipinitialspace=False,
                  skiprows=None,
+                 skipfooter=0,
                  nrows=None,
 
                  # NA and Missing Data Handling
@@ -600,6 +602,7 @@ def parser_f(filepath_or_buffer,
                  lineterminator=None,
                  quotechar='"',
                  quoting=csv.QUOTE_MINIMAL,
+                 doublequote=True,
                  escapechar=None,
                  comment=None,
                  encoding=None,
@@ -610,10 +613,7 @@ def parser_f(filepath_or_buffer,
                  error_bad_lines=True,
                  warn_bad_lines=True,
 
-                 skipfooter=0,
-
                  # Internal
-                 doublequote=True,
                  delim_whitespace=False,
                  low_memory=_c_parser_defaults['low_memory'],
                  memory_map=False,
@@ -665,6 +665,7 @@ def parser_f(filepath_or_buffer,
                     names=names,
                     prefix=prefix,
                     skiprows=skiprows,
+                    skipfooter=skipfooter,
                     na_values=na_values,
                     true_values=true_values,
                     false_values=false_values,
@@ -681,7 +682,6 @@ def parser_f(filepath_or_buffer,
                     nrows=nrows,
                     iterator=iterator,
                     chunksize=chunksize,
-                    skipfooter=skipfooter,
                     converters=converters,
                     dtype=dtype,
                     usecols=usecols,
@@ -1994,18 +1994,18 @@ def TextParser(*args, **kwds):
         rows will be discarded
     index_col : int or list, default None
         Column or columns to use as the (possibly hierarchical) index
-    has_index_names: boolean, default False
+    has_index_names: bool, default False
         True if the cols defined in index_col have an index name and are
-        not in the header
+        not in the header.
     na_values : scalar, str, list-like, or dict, default None
         Additional strings to recognize as NA/NaN.
     keep_default_na : bool, default True
     thousands : str, default None
         Thousands separator
     comment : str, default None
         Comment out remainder of line
-    parse_dates : boolean, default False
-    keep_date_col : boolean, default False
+    parse_dates : bool, default False
+    keep_date_col : bool, default False
     date_parser : function, default None
     skiprows : list of integers
         Row numbers to skip
@@ -2016,15 +2016,15 @@ def TextParser(*args, **kwds):
         either be integers or column labels, values are functions that take one
         input argument, the cell (not column) content, and return the
         transformed content.
-    encoding : string, default None
+    encoding : str, default None
         Encoding to use for UTF when reading/writing (ex. 'utf-8')
-    squeeze : boolean, default False
-        returns Series if only one column
-    infer_datetime_format: boolean, default False
+    squeeze : bool, default False
+        returns Series if only one column.
+    infer_datetime_format: bool, default False
         If True and `parse_dates` is True for a column, try to infer the
         datetime format based on the first datetime string. If the format
         can be inferred, there often will be a large parsing speed-up.
-    float_precision : string, default None
+    float_precision : str, default None
         Specifies which converter the C engine should use for floating-point
         values. The options are None for the ordinary converter,
         'high' for the high-precision converter, and 'round_trip' for the