diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index e53c828fe30cb..497cf261fcece 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -38,7 +38,7 @@
 )
 from pandas.io.json._normalize import convert_to_line_delimits
 from pandas.io.json._table_schema import build_table_schema, parse_table_schema
-from pandas.io.parsers import validate_integer
+from pandas.io.parsers.readers import validate_integer
 
 loads = json.loads
 dumps = json.dumps
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
deleted file mode 100644
index e58e59a722b7a..0000000000000
--- a/pandas/io/parsers.py
+++ /dev/null
@@ -1,3997 +0,0 @@
-"""
-Module contains tools for processing files into DataFrames or other objects
-"""
-
-from collections import abc, defaultdict
-import csv
-import datetime
-from io import StringIO
-import itertools
-import re
-import sys
-from textwrap import fill
-from typing import (
-    Any,
-    Dict,
-    Iterable,
-    Iterator,
-    List,
-    Optional,
-    Sequence,
-    Set,
-    Type,
-    cast,
-)
-import warnings
-
-import numpy as np
-
-import pandas._libs.lib as lib
-import pandas._libs.ops as libops
-import pandas._libs.parsers as parsers
-from pandas._libs.parsers import STR_NA_VALUES
-from pandas._libs.tslibs import parsing
-from pandas._typing import DtypeArg, FilePathOrBuffer, StorageOptions, Union
-from pandas.errors import (
-    AbstractMethodError,
-    EmptyDataError,
-    ParserError,
-    ParserWarning,
-)
-from pandas.util._decorators import Appender
-
-from pandas.core.dtypes.cast import astype_nansafe
-from pandas.core.dtypes.common import (
-    ensure_object,
-    ensure_str,
-    is_bool_dtype,
-    is_categorical_dtype,
-    is_dict_like,
-    is_dtype_equal,
-    is_extension_array_dtype,
-    is_file_like,
-    is_float,
-    is_integer,
-    is_integer_dtype,
-    is_list_like,
-    is_object_dtype,
-    is_scalar,
-    is_string_dtype,
-    pandas_dtype,
-)
-from pandas.core.dtypes.dtypes import CategoricalDtype
-from pandas.core.dtypes.missing import isna
-
-from pandas.core import algorithms, generic
-from pandas.core.arrays import Categorical
-from pandas.core.frame import DataFrame
-from pandas.core.indexes.api import (
-    Index,
-    MultiIndex,
-    RangeIndex,
-    ensure_index_from_sequences,
-)
-from pandas.core.series import Series
-from pandas.core.tools import datetimes as tools
-
-from pandas.io.common import IOHandles, get_handle, validate_header_arg
-from pandas.io.date_converters import generic_parser
-
-# BOM character (byte order mark)
-# This exists at the beginning of a file to indicate endianness
-# of a file (stream). Unfortunately, this marker screws up parsing,
-# so we need to remove it if we see it.
-_BOM = "\ufeff"
-
-_doc_read_csv_and_table = (
-    r"""
-{summary}
-
-Also supports optionally iterating or breaking of the file
-into chunks.
-
-Additional help can be found in the online docs for
-`IO Tools <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
-
-Parameters
-----------
-filepath_or_buffer : str, path object or file-like object
-    Any valid string path is acceptable. The string could be a URL. Valid
-    URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
-    expected. A local file could be: file://localhost/path/to/table.csv.
-
-    If you want to pass in a path object, pandas accepts any ``os.PathLike``.
-
-    By file-like object, we refer to objects with a ``read()`` method, such as
-    a file handle (e.g. via builtin ``open`` function) or ``StringIO``.
-sep : str, default {_default_sep}
-    Delimiter to use. If sep is None, the C engine cannot automatically detect
-    the separator, but the Python parsing engine can, meaning the latter will
-    be used and automatically detect the separator by Python's builtin sniffer
-    tool, ``csv.Sniffer``. In addition, separators longer than 1 character and
-    different from ``'\s+'`` will be interpreted as regular expressions and
-    will also force the use of the Python parsing engine. Note that regex
-    delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``.
-delimiter : str, default ``None``
-    Alias for sep.
-header : int, list of int, default 'infer'
-    Row number(s) to use as the column names, and the start of the
-    data.  Default behavior is to infer the column names: if no names
-    are passed the behavior is identical to ``header=0`` and column
-    names are inferred from the first line of the file, if column
-    names are passed explicitly then the behavior is identical to
-    ``header=None``. Explicitly pass ``header=0`` to be able to
-    replace existing names. The header can be a list of integers that
-    specify row locations for a multi-index on the columns
-    e.g. [0,1,3]. Intervening rows that are not specified will be
-    skipped (e.g. 2 in this example is skipped). Note that this
-    parameter ignores commented lines and empty lines if
-    ``skip_blank_lines=True``, so ``header=0`` denotes the first line of
-    data rather than the first line of the file.
-names : array-like, optional
-    List of column names to use. If the file contains a header row,
-    then you should explicitly pass ``header=0`` to override the column names.
-    Duplicates in this list are not allowed.
-index_col : int, str, sequence of int / str, or False, default ``None``
-  Column(s) to use as the row labels of the ``DataFrame``, either given as
-  string name or column index. If a sequence of int / str is given, a
-  MultiIndex is used.
-
-  Note: ``index_col=False`` can be used to force pandas to *not* use the first
-  column as the index, e.g. when you have a malformed file with delimiters at
-  the end of each line.
-usecols : list-like or callable, optional
-    Return a subset of the columns. If list-like, all elements must either
-    be positional (i.e. integer indices into the document columns) or strings
-    that correspond to column names provided either by the user in `names` or
-    inferred from the document header row(s). For example, a valid list-like
-    `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.
-    Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.
-    To instantiate a DataFrame from ``data`` with element order preserved use
-    ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns
-    in ``['foo', 'bar']`` order or
-    ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``
-    for ``['bar', 'foo']`` order.
-
-    If callable, the callable function will be evaluated against the column
-    names, returning names where the callable function evaluates to True. An
-    example of a valid callable argument would be ``lambda x: x.upper() in
-    ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster
-    parsing time and lower memory usage.
-squeeze : bool, default False
-    If the parsed data only contains one column then return a Series.
-prefix : str, optional
-    Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
-mangle_dupe_cols : bool, default True
-    Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
-    'X'...'X'. Passing in False will cause data to be overwritten if there
-    are duplicate names in the columns.
-dtype : Type name or dict of column -> type, optional
-    Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32,
-    'c': 'Int64'}}
-    Use `str` or `object` together with suitable `na_values` settings
-    to preserve and not interpret dtype.
-    If converters are specified, they will be applied INSTEAD
-    of dtype conversion.
-engine : {{'c', 'python'}}, optional
-    Parser engine to use. The C engine is faster while the python engine is
-    currently more feature-complete.
-converters : dict, optional
-    Dict of functions for converting values in certain columns. Keys can either
-    be integers or column labels.
-true_values : list, optional
-    Values to consider as True.
-false_values : list, optional
-    Values to consider as False.
-skipinitialspace : bool, default False
-    Skip spaces after delimiter.
-skiprows : list-like, int or callable, optional
-    Line numbers to skip (0-indexed) or number of lines to skip (int)
-    at the start of the file.
-
-    If callable, the callable function will be evaluated against the row
-    indices, returning True if the row should be skipped and False otherwise.
-    An example of a valid callable argument would be ``lambda x: x in [0, 2]``.
-skipfooter : int, default 0
-    Number of lines at bottom of file to skip (Unsupported with engine='c').
-nrows : int, optional
-    Number of rows of file to read. Useful for reading pieces of large files.
-na_values : scalar, str, list-like, or dict, optional
-    Additional strings to recognize as NA/NaN. If dict passed, specific
-    per-column NA values.  By default the following values are interpreted as
-    NaN: '"""
-    + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent="    ")
-    + """'.
-keep_default_na : bool, default True
-    Whether or not to include the default NaN values when parsing the data.
-    Depending on whether `na_values` is passed in, the behavior is as follows:
-
-    * If `keep_default_na` is True, and `na_values` are specified, `na_values`
-      is appended to the default NaN values used for parsing.
-    * If `keep_default_na` is True, and `na_values` are not specified, only
-      the default NaN values are used for parsing.
-    * If `keep_default_na` is False, and `na_values` are specified, only
-      the NaN values specified `na_values` are used for parsing.
-    * If `keep_default_na` is False, and `na_values` are not specified, no
-      strings will be parsed as NaN.
-
-    Note that if `na_filter` is passed in as False, the `keep_default_na` and
-    `na_values` parameters will be ignored.
-na_filter : bool, default True
-    Detect missing value markers (empty strings and the value of na_values). In
-    data without any NAs, passing na_filter=False can improve the performance
-    of reading a large file.
-verbose : bool, default False
-    Indicate number of NA values placed in non-numeric columns.
-skip_blank_lines : bool, default True
-    If True, skip over blank lines rather than interpreting as NaN values.
-parse_dates : bool or list of int or names or list of lists or dict, \
-default False
-    The behavior is as follows:
-
-    * boolean. If True -> try parsing the index.
-    * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
-      each as a separate date column.
-    * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as
-      a single date column.
-    * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call
-      result 'foo'
-
-    If a column or index cannot be represented as an array of datetimes,
-    say because of an unparsable value or a mixture of timezones, the column
-    or index will be returned unaltered as an object data type. For
-    non-standard datetime parsing, use ``pd.to_datetime`` after
-    ``pd.read_csv``. To parse an index or column with a mixture of timezones,
-    specify ``date_parser`` to be a partially-applied
-    :func:`pandas.to_datetime` with ``utc=True``. See
-    :ref:`io.csv.mixed_timezones` for more.
-
-    Note: A fast-path exists for iso8601-formatted dates.
-infer_datetime_format : bool, default False
-    If True and `parse_dates` is enabled, pandas will attempt to infer the
-    format of the datetime strings in the columns, and if it can be inferred,
-    switch to a faster method of parsing them. In some cases this can increase
-    the parsing speed by 5-10x.
-keep_date_col : bool, default False
-    If True and `parse_dates` specifies combining multiple columns then
-    keep the original columns.
-date_parser : function, optional
-    Function to use for converting a sequence of string columns to an array of
-    datetime instances. The default uses ``dateutil.parser.parser`` to do the
-    conversion. Pandas will try to call `date_parser` in three different ways,
-    advancing to the next if an exception occurs: 1) Pass one or more arrays
-    (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
-    string values from the columns defined by `parse_dates` into a single array
-    and pass that; and 3) call `date_parser` once for each row using one or
-    more strings (corresponding to the columns defined by `parse_dates`) as
-    arguments.
-dayfirst : bool, default False
-    DD/MM format dates, international and European format.
-cache_dates : bool, default True
-    If True, use a cache of unique, converted dates to apply the datetime
-    conversion. May produce significant speed-up when parsing duplicate
-    date strings, especially ones with timezone offsets.
-
-    .. versionadded:: 0.25.0
-iterator : bool, default False
-    Return TextFileReader object for iteration or getting chunks with
-    ``get_chunk()``.
-
-    .. versionchanged:: 1.2
-
-       ``TextFileReader`` is a context manager.
-chunksize : int, optional
-    Return TextFileReader object for iteration.
-    See the `IO Tools docs
-    <https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_
-    for more information on ``iterator`` and ``chunksize``.
-
-    .. versionchanged:: 1.2
-
-       ``TextFileReader`` is a context manager.
-compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
-    For on-the-fly decompression of on-disk data. If 'infer' and
-    `filepath_or_buffer` is path-like, then detect compression from the
-    following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
-    decompression). If using 'zip', the ZIP file must contain only one data
-    file to be read in. Set to None for no decompression.
-thousands : str, optional
-    Thousands separator.
-decimal : str, default '.'
-    Character to recognize as decimal point (e.g. use ',' for European data).
-lineterminator : str (length 1), optional
-    Character to break file into lines. Only valid with C parser.
-quotechar : str (length 1), optional
-    The character used to denote the start and end of a quoted item. Quoted
-    items can include the delimiter and it will be ignored.
-quoting : int or csv.QUOTE_* instance, default 0
-    Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of
-    QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).
-doublequote : bool, default ``True``
-   When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate
-   whether or not to interpret two consecutive quotechar elements INSIDE a
-   field as a single ``quotechar`` element.
-escapechar : str (length 1), optional
-    One-character string used to escape other characters.
-comment : str, optional
-    Indicates remainder of line should not be parsed. If found at the beginning
-    of a line, the line will be ignored altogether. This parameter must be a
-    single character. Like empty lines (as long as ``skip_blank_lines=True``),
-    fully commented lines are ignored by the parameter `header` but not by
-    `skiprows`. For example, if ``comment='#'``, parsing
-    ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in 'a,b,c' being
-    treated as the header.
-encoding : str, optional
-    Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python
-    standard encodings
-    <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ .
-dialect : str or csv.Dialect, optional
-    If provided, this parameter will override values (default or not) for the
-    following parameters: `delimiter`, `doublequote`, `escapechar`,
-    `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
-    override values, a ParserWarning will be issued. See csv.Dialect
-    documentation for more details.
-error_bad_lines : bool, default True
-    Lines with too many fields (e.g. a csv line with too many commas) will by
-    default cause an exception to be raised, and no DataFrame will be returned.
-    If False, then these "bad lines" will dropped from the DataFrame that is
-    returned.
-warn_bad_lines : bool, default True
-    If error_bad_lines is False, and warn_bad_lines is True, a warning for each
-    "bad line" will be output.
-delim_whitespace : bool, default False
-    Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be
-    used as the sep. Equivalent to setting ``sep='\\s+'``. If this option
-    is set to True, nothing should be passed in for the ``delimiter``
-    parameter.
-low_memory : bool, default True
-    Internally process the file in chunks, resulting in lower memory use
-    while parsing, but possibly mixed type inference.  To ensure no mixed
-    types either set False, or specify the type with the `dtype` parameter.
-    Note that the entire file is read into a single DataFrame regardless,
-    use the `chunksize` or `iterator` parameter to return the data in chunks.
-    (Only valid with C parser).
-memory_map : bool, default False
-    If a filepath is provided for `filepath_or_buffer`, map the file object
-    directly onto memory and access the data directly from there. Using this
-    option can improve performance because there is no longer any I/O overhead.
-float_precision : str, optional
-    Specifies which converter the C engine should use for floating-point
-    values. The options are ``None`` or 'high' for the ordinary converter,
-    'legacy' for the original lower precision pandas converter, and
-    'round_trip' for the round-trip converter.
-
-    .. versionchanged:: 1.2
-
-{storage_options}
-
-    .. versionadded:: 1.2
-
-Returns
--------
-DataFrame or TextParser
-    A comma-separated values (csv) file is returned as two-dimensional
-    data structure with labeled axes.
-
-See Also
---------
-DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
-read_csv : Read a comma-separated values (csv) file into DataFrame.
-read_fwf : Read a table of fixed-width formatted lines into DataFrame.
-
-Examples
---------
->>> pd.{func_name}('data.csv')  # doctest: +SKIP
-"""
-)
-
-
-def validate_integer(name, val, min_val=0):
-    """
-    Checks whether the 'name' parameter for parsing is either
-    an integer OR float that can SAFELY be cast to an integer
-    without losing accuracy. Raises a ValueError if that is
-    not the case.
-
-    Parameters
-    ----------
-    name : string
-        Parameter name (used for error reporting)
-    val : int or float
-        The value to check
-    min_val : int
-        Minimum allowed value (val < min_val will result in a ValueError)
-    """
-    msg = f"'{name:s}' must be an integer >={min_val:d}"
-
-    if val is not None:
-        if is_float(val):
-            if int(val) != val:
-                raise ValueError(msg)
-            val = int(val)
-        elif not (is_integer(val) and val >= min_val):
-            raise ValueError(msg)
-
-    return val
-
-
-def _validate_names(names):
-    """
-    Raise ValueError if the `names` parameter contains duplicates or has an
-    invalid data type.
-
-    Parameters
-    ----------
-    names : array-like or None
-        An array containing a list of the names used for the output DataFrame.
-
-    Raises
-    ------
-    ValueError
-        If names are not unique or are not ordered (e.g. set).
-    """
-    if names is not None:
-        if len(names) != len(set(names)):
-            raise ValueError("Duplicate names are not allowed.")
-        if not (
-            is_list_like(names, allow_sets=False) or isinstance(names, abc.KeysView)
-        ):
-            raise ValueError("Names should be an ordered collection.")
-
-
-def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
-    """Generic reader of line files."""
-    if kwds.get("date_parser", None) is not None:
-        if isinstance(kwds["parse_dates"], bool):
-            kwds["parse_dates"] = True
-
-    # Extract some of the arguments (pass chunksize on).
-    iterator = kwds.get("iterator", False)
-    chunksize = validate_integer("chunksize", kwds.get("chunksize", None), 1)
-    nrows = kwds.get("nrows", None)
-
-    # Check for duplicates in names.
-    _validate_names(kwds.get("names", None))
-
-    # Create the parser.
-    parser = TextFileReader(filepath_or_buffer, **kwds)
-
-    if chunksize or iterator:
-        return parser
-
-    with parser:
-        return parser.read(nrows)
-
-
-_parser_defaults = {
-    "delimiter": None,
-    "escapechar": None,
-    "quotechar": '"',
-    "quoting": csv.QUOTE_MINIMAL,
-    "doublequote": True,
-    "skipinitialspace": False,
-    "lineterminator": None,
-    "header": "infer",
-    "index_col": None,
-    "names": None,
-    "prefix": None,
-    "skiprows": None,
-    "skipfooter": 0,
-    "nrows": None,
-    "na_values": None,
-    "keep_default_na": True,
-    "true_values": None,
-    "false_values": None,
-    "converters": None,
-    "dtype": None,
-    "cache_dates": True,
-    "thousands": None,
-    "comment": None,
-    "decimal": ".",
-    # 'engine': 'c',
-    "parse_dates": False,
-    "keep_date_col": False,
-    "dayfirst": False,
-    "date_parser": None,
-    "usecols": None,
-    # 'iterator': False,
-    "chunksize": None,
-    "verbose": False,
-    "encoding": None,
-    "squeeze": False,
-    "compression": None,
-    "mangle_dupe_cols": True,
-    "infer_datetime_format": False,
-    "skip_blank_lines": True,
-}
-
-
-_c_parser_defaults = {
-    "delim_whitespace": False,
-    "na_filter": True,
-    "low_memory": True,
-    "memory_map": False,
-    "error_bad_lines": True,
-    "warn_bad_lines": True,
-    "float_precision": None,
-}
-
-_fwf_defaults = {"colspecs": "infer", "infer_nrows": 100, "widths": None}
-
-_c_unsupported = {"skipfooter"}
-_python_unsupported = {"low_memory", "float_precision"}
-
-_deprecated_defaults: Dict[str, Any] = {}
-_deprecated_args: Set[str] = set()
-
-
-@Appender(
-    _doc_read_csv_and_table.format(
-        func_name="read_csv",
-        summary="Read a comma-separated values (csv) file into DataFrame.",
-        _default_sep="','",
-        storage_options=generic._shared_docs["storage_options"],
-    )
-)
-def read_csv(
-    filepath_or_buffer: FilePathOrBuffer,
-    sep=lib.no_default,
-    delimiter=None,
-    # Column and Index Locations and Names
-    header="infer",
-    names=None,
-    index_col=None,
-    usecols=None,
-    squeeze=False,
-    prefix=None,
-    mangle_dupe_cols=True,
-    # General Parsing Configuration
-    dtype: Optional[DtypeArg] = None,
-    engine=None,
-    converters=None,
-    true_values=None,
-    false_values=None,
-    skipinitialspace=False,
-    skiprows=None,
-    skipfooter=0,
-    nrows=None,
-    # NA and Missing Data Handling
-    na_values=None,
-    keep_default_na=True,
-    na_filter=True,
-    verbose=False,
-    skip_blank_lines=True,
-    # Datetime Handling
-    parse_dates=False,
-    infer_datetime_format=False,
-    keep_date_col=False,
-    date_parser=None,
-    dayfirst=False,
-    cache_dates=True,
-    # Iteration
-    iterator=False,
-    chunksize=None,
-    # Quoting, Compression, and File Format
-    compression="infer",
-    thousands=None,
-    decimal: str = ".",
-    lineterminator=None,
-    quotechar='"',
-    quoting=csv.QUOTE_MINIMAL,
-    doublequote=True,
-    escapechar=None,
-    comment=None,
-    encoding=None,
-    dialect=None,
-    # Error Handling
-    error_bad_lines=True,
-    warn_bad_lines=True,
-    # Internal
-    delim_whitespace=False,
-    low_memory=_c_parser_defaults["low_memory"],
-    memory_map=False,
-    float_precision=None,
-    storage_options: StorageOptions = None,
-):
-    kwds = locals()
-    del kwds["filepath_or_buffer"]
-    del kwds["sep"]
-
-    kwds_defaults = _refine_defaults_read(
-        dialect, delimiter, delim_whitespace, engine, sep, defaults={"delimiter": ","}
-    )
-    kwds.update(kwds_defaults)
-
-    return _read(filepath_or_buffer, kwds)
-
-
-@Appender(
-    _doc_read_csv_and_table.format(
-        func_name="read_table",
-        summary="Read general delimited file into DataFrame.",
-        _default_sep=r"'\\t' (tab-stop)",
-        storage_options=generic._shared_docs["storage_options"],
-    )
-)
-def read_table(
-    filepath_or_buffer: FilePathOrBuffer,
-    sep=lib.no_default,
-    delimiter=None,
-    # Column and Index Locations and Names
-    header="infer",
-    names=None,
-    index_col=None,
-    usecols=None,
-    squeeze=False,
-    prefix=None,
-    mangle_dupe_cols=True,
-    # General Parsing Configuration
-    dtype: Optional[DtypeArg] = None,
-    engine=None,
-    converters=None,
-    true_values=None,
-    false_values=None,
-    skipinitialspace=False,
-    skiprows=None,
-    skipfooter=0,
-    nrows=None,
-    # NA and Missing Data Handling
-    na_values=None,
-    keep_default_na=True,
-    na_filter=True,
-    verbose=False,
-    skip_blank_lines=True,
-    # Datetime Handling
-    parse_dates=False,
-    infer_datetime_format=False,
-    keep_date_col=False,
-    date_parser=None,
-    dayfirst=False,
-    cache_dates=True,
-    # Iteration
-    iterator=False,
-    chunksize=None,
-    # Quoting, Compression, and File Format
-    compression="infer",
-    thousands=None,
-    decimal: str = ".",
-    lineterminator=None,
-    quotechar='"',
-    quoting=csv.QUOTE_MINIMAL,
-    doublequote=True,
-    escapechar=None,
-    comment=None,
-    encoding=None,
-    dialect=None,
-    # Error Handling
-    error_bad_lines=True,
-    warn_bad_lines=True,
-    # Internal
-    delim_whitespace=False,
-    low_memory=_c_parser_defaults["low_memory"],
-    memory_map=False,
-    float_precision=None,
-):
-    kwds = locals()
-    del kwds["filepath_or_buffer"]
-    del kwds["sep"]
-
-    kwds_defaults = _refine_defaults_read(
-        dialect, delimiter, delim_whitespace, engine, sep, defaults={"delimiter": "\t"}
-    )
-    kwds.update(kwds_defaults)
-
-    return _read(filepath_or_buffer, kwds)
-
-
-def read_fwf(
-    filepath_or_buffer: FilePathOrBuffer,
-    colspecs="infer",
-    widths=None,
-    infer_nrows=100,
-    **kwds,
-):
-    r"""
-    Read a table of fixed-width formatted lines into DataFrame.
-
-    Also supports optionally iterating or breaking of the file
-    into chunks.
-
-    Additional help can be found in the `online docs for IO Tools
-    <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
-
-    Parameters
-    ----------
-    filepath_or_buffer : str, path object or file-like object
-        Any valid string path is acceptable. The string could be a URL. Valid
-        URL schemes include http, ftp, s3, and file. For file URLs, a host is
-        expected. A local file could be:
-        ``file://localhost/path/to/table.csv``.
-
-        If you want to pass in a path object, pandas accepts any
-        ``os.PathLike``.
-
-        By file-like object, we refer to objects with a ``read()`` method,
-        such as a file handle (e.g. via builtin ``open`` function)
-        or ``StringIO``.
-    colspecs : list of tuple (int, int) or 'infer'. optional
-        A list of tuples giving the extents of the fixed-width
-        fields of each line as half-open intervals (i.e.,  [from, to[ ).
-        String value 'infer' can be used to instruct the parser to try
-        detecting the column specifications from the first 100 rows of
-        the data which are not being skipped via skiprows (default='infer').
-    widths : list of int, optional
-        A list of field widths which can be used instead of 'colspecs' if
-        the intervals are contiguous.
-    infer_nrows : int, default 100
-        The number of rows to consider when letting the parser determine the
-        `colspecs`.
-
-        .. versionadded:: 0.24.0
-    **kwds : optional
-        Optional keyword arguments can be passed to ``TextFileReader``.
-
-    Returns
-    -------
-    DataFrame or TextParser
-        A comma-separated values (csv) file is returned as two-dimensional
-        data structure with labeled axes.
-
-    See Also
-    --------
-    DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
-    read_csv : Read a comma-separated values (csv) file into DataFrame.
-
-    Examples
-    --------
-    >>> pd.read_fwf('data.csv')  # doctest: +SKIP
-    """
-    # Check input arguments.
-    if colspecs is None and widths is None:
-        raise ValueError("Must specify either colspecs or widths")
-    elif colspecs not in (None, "infer") and widths is not None:
-        raise ValueError("You must specify only one of 'widths' and 'colspecs'")
-
-    # Compute 'colspecs' from 'widths', if specified.
-    if widths is not None:
-        colspecs, col = [], 0
-        for w in widths:
-            colspecs.append((col, col + w))
-            col += w
-
-    kwds["colspecs"] = colspecs
-    kwds["infer_nrows"] = infer_nrows
-    kwds["engine"] = "python-fwf"
-    return _read(filepath_or_buffer, kwds)
-
-
-class TextFileReader(abc.Iterator):
-    """
-
-    Passed dialect overrides any of the related parser options
-
-    """
-
-    def __init__(self, f, engine=None, **kwds):
-
-        self.f = f
-
-        if engine is not None:
-            engine_specified = True
-        else:
-            engine = "python"
-            engine_specified = False
-        self.engine = engine
-        self._engine_specified = kwds.get("engine_specified", engine_specified)
-
-        _validate_skipfooter(kwds)
-
-        dialect = _extract_dialect(kwds)
-        if dialect is not None:
-            kwds = _merge_with_dialect_properties(dialect, kwds)
-
-        if kwds.get("header", "infer") == "infer":
-            kwds["header"] = 0 if kwds.get("names") is None else None
-
-        self.orig_options = kwds
-
-        # miscellanea
-        self._currow = 0
-
-        options = self._get_options_with_defaults(engine)
-        options["storage_options"] = kwds.get("storage_options", None)
-
-        self.chunksize = options.pop("chunksize", None)
-        self.nrows = options.pop("nrows", None)
-        self.squeeze = options.pop("squeeze", False)
-
-        self._check_file_or_buffer(f, engine)
-        self.options, self.engine = self._clean_options(options, engine)
-
-        if "has_index_names" in kwds:
-            self.options["has_index_names"] = kwds["has_index_names"]
-
-        self._engine = self._make_engine(self.engine)
-
-    def close(self):
-        self._engine.close()
-
-    def _get_options_with_defaults(self, engine):
-        kwds = self.orig_options
-
-        options = {}
-
-        for argname, default in _parser_defaults.items():
-            value = kwds.get(argname, default)
-
-            # see gh-12935
-            if argname == "mangle_dupe_cols" and not value:
-                raise ValueError("Setting mangle_dupe_cols=False is not supported yet")
-            else:
-                options[argname] = value
-
-        for argname, default in _c_parser_defaults.items():
-            if argname in kwds:
-                value = kwds[argname]
-
-                if engine != "c" and value != default:
-                    if "python" in engine and argname not in _python_unsupported:
-                        pass
-                    elif value == _deprecated_defaults.get(argname, default):
-                        pass
-                    else:
-                        raise ValueError(
-                            f"The {repr(argname)} option is not supported with the "
-                            f"{repr(engine)} engine"
-                        )
-            else:
-                value = _deprecated_defaults.get(argname, default)
-            options[argname] = value
-
-        if engine == "python-fwf":
-            # pandas\io\parsers.py:907: error: Incompatible types in assignment
-            # (expression has type "object", variable has type "Union[int, str,
-            # None]")  [assignment]
-            for argname, default in _fwf_defaults.items():  # type: ignore[assignment]
-                options[argname] = kwds.get(argname, default)
-
-        return options
-
-    def _check_file_or_buffer(self, f, engine):
-        # see gh-16530
-        if is_file_like(f) and engine != "c" and not hasattr(f, "__next__"):
-            # The C engine doesn't need the file-like to have the "__next__"
-            # attribute. However, the Python engine explicitly calls
-            # "__next__(...)" when iterating through such an object, meaning it
-            # needs to have that attribute
-            raise ValueError(
-                "The 'python' engine cannot iterate through this file buffer."
-            )
-
-    def _clean_options(self, options, engine):
-        result = options.copy()
-
-        fallback_reason = None
-
-        # C engine not supported yet
-        if engine == "c":
-            if options["skipfooter"] > 0:
-                fallback_reason = "the 'c' engine does not support skipfooter"
-                engine = "python"
-
-        sep = options["delimiter"]
-        delim_whitespace = options["delim_whitespace"]
-
-        if sep is None and not delim_whitespace:
-            if engine == "c":
-                fallback_reason = (
-                    "the 'c' engine does not support "
-                    "sep=None with delim_whitespace=False"
-                )
-                engine = "python"
-        elif sep is not None and len(sep) > 1:
-            if engine == "c" and sep == r"\s+":
-                result["delim_whitespace"] = True
-                del result["delimiter"]
-            elif engine not in ("python", "python-fwf"):
-                # wait until regex engine integrated
-                fallback_reason = (
-                    "the 'c' engine does not support "
-                    "regex separators (separators > 1 char and "
-                    r"different from '\s+' are interpreted as regex)"
-                )
-                engine = "python"
-        elif delim_whitespace:
-            if "python" in engine:
-                result["delimiter"] = r"\s+"
-        elif sep is not None:
-            encodeable = True
-            encoding = sys.getfilesystemencoding() or "utf-8"
-            try:
-                if len(sep.encode(encoding)) > 1:
-                    encodeable = False
-            except UnicodeDecodeError:
-                encodeable = False
-            if not encodeable and engine not in ("python", "python-fwf"):
-                fallback_reason = (
-                    f"the separator encoded in {encoding} "
-                    "is > 1 char long, and the 'c' engine "
-                    "does not support such separators"
-                )
-                engine = "python"
-
-        quotechar = options["quotechar"]
-        if quotechar is not None and isinstance(quotechar, (str, bytes)):
-            if (
-                len(quotechar) == 1
-                and ord(quotechar) > 127
-                and engine not in ("python", "python-fwf")
-            ):
-                fallback_reason = (
-                    "ord(quotechar) > 127, meaning the "
-                    "quotechar is larger than one byte, "
-                    "and the 'c' engine does not support such quotechars"
-                )
-                engine = "python"
-
-        if fallback_reason and self._engine_specified:
-            raise ValueError(fallback_reason)
-
-        if engine == "c":
-            for arg in _c_unsupported:
-                del result[arg]
-
-        if "python" in engine:
-            for arg in _python_unsupported:
-                if fallback_reason and result[arg] != _c_parser_defaults[arg]:
-                    raise ValueError(
-                        "Falling back to the 'python' engine because "
-                        f"{fallback_reason}, but this causes {repr(arg)} to be "
-                        "ignored as it is not supported by the 'python' engine."
-                    )
-                del result[arg]
-
-        if fallback_reason:
-            warnings.warn(
-                (
-                    "Falling back to the 'python' engine because "
-                    f"{fallback_reason}; you can avoid this warning by specifying "
-                    "engine='python'."
-                ),
-                ParserWarning,
-                stacklevel=5,
-            )
-
-        index_col = options["index_col"]
-        names = options["names"]
-        converters = options["converters"]
-        na_values = options["na_values"]
-        skiprows = options["skiprows"]
-
-        validate_header_arg(options["header"])
-
-        for arg in _deprecated_args:
-            parser_default = _c_parser_defaults[arg]
-            depr_default = _deprecated_defaults[arg]
-            if result.get(arg, depr_default) != depr_default:
-                msg = (
-                    f"The {arg} argument has been deprecated and will be "
-                    "removed in a future version.\n\n"
-                )
-                warnings.warn(msg, FutureWarning, stacklevel=2)
-            else:
-                result[arg] = parser_default
-
-        if index_col is True:
-            raise ValueError("The value of index_col couldn't be 'True'")
-        if _is_index_col(index_col):
-            if not isinstance(index_col, (list, tuple, np.ndarray)):
-                index_col = [index_col]
-        result["index_col"] = index_col
-
-        names = list(names) if names is not None else names
-
-        # type conversion-related
-        if converters is not None:
-            if not isinstance(converters, dict):
-                raise TypeError(
-                    "Type converters must be a dict or subclass, "
-                    f"input was a {type(converters).__name__}"
-                )
-        else:
-            converters = {}
-
-        # Converting values to NA
-        keep_default_na = options["keep_default_na"]
-        na_values, na_fvalues = _clean_na_values(na_values, keep_default_na)
-
-        # handle skiprows; this is internally handled by the
-        # c-engine, so only need for python parsers
-        if engine != "c":
-            if is_integer(skiprows):
-                skiprows = list(range(skiprows))
-            if skiprows is None:
-                skiprows = set()
-            elif not callable(skiprows):
-                skiprows = set(skiprows)
-
-        # put stuff back
-        result["names"] = names
-        result["converters"] = converters
-        result["na_values"] = na_values
-        result["na_fvalues"] = na_fvalues
-        result["skiprows"] = skiprows
-
-        return result, engine
-
-    def __next__(self):
-        try:
-            return self.get_chunk()
-        except StopIteration:
-            self.close()
-            raise
-
-    def _make_engine(self, engine="c"):
-        mapping: Dict[str, Type[ParserBase]] = {
-            "c": CParserWrapper,
-            "python": PythonParser,
-            "python-fwf": FixedWidthFieldParser,
-        }
-        if engine not in mapping:
-            raise ValueError(
-                f"Unknown engine: {engine} (valid options are {mapping.keys()})"
-            )
-        # error: Too many arguments for "ParserBase"
-        return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
-
-    def _failover_to_python(self):
-        raise AbstractMethodError(self)
-
-    def read(self, nrows=None):
-        nrows = validate_integer("nrows", nrows)
-        index, columns, col_dict = self._engine.read(nrows)
-
-        if index is None:
-            if col_dict:
-                # Any column is actually fine:
-                new_rows = len(next(iter(col_dict.values())))
-                index = RangeIndex(self._currow, self._currow + new_rows)
-            else:
-                new_rows = 0
-        else:
-            new_rows = len(index)
-
-        df = DataFrame(col_dict, columns=columns, index=index)
-
-        self._currow += new_rows
-
-        if self.squeeze and len(df.columns) == 1:
-            return df[df.columns[0]].copy()
-        return df
-
-    def get_chunk(self, size=None):
-        if size is None:
-            size = self.chunksize
-        if self.nrows is not None:
-            if self._currow >= self.nrows:
-                raise StopIteration
-            size = min(size, self.nrows - self._currow)
-        return self.read(nrows=size)
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc_value, traceback):
-        self.close()
-
-
-def _is_index_col(col):
-    return col is not None and col is not False
-
-
-def _is_potential_multi_index(
-    columns, index_col: Optional[Union[bool, Sequence[int]]] = None
-):
-    """
-    Check whether or not the `columns` parameter
-    could be converted into a MultiIndex.
-
-    Parameters
-    ----------
-    columns : array-like
-        Object which may or may not be convertible into a MultiIndex
-    index_col : None, bool or list, optional
-        Column or columns to use as the (possibly hierarchical) index
-
-    Returns
-    -------
-    boolean : Whether or not columns could become a MultiIndex
-    """
-    if index_col is None or isinstance(index_col, bool):
-        index_col = []
-
-    return (
-        len(columns)
-        and not isinstance(columns, MultiIndex)
-        and all(isinstance(c, tuple) for c in columns if c not in list(index_col))
-    )
-
-
-def _evaluate_usecols(usecols, names):
-    """
-    Check whether or not the 'usecols' parameter
-    is a callable.  If so, enumerates the 'names'
-    parameter and returns a set of indices for
-    each entry in 'names' that evaluates to True.
-    If not a callable, returns 'usecols'.
-    """
-    if callable(usecols):
-        return {i for i, name in enumerate(names) if usecols(name)}
-    return usecols
-
-
-def _validate_usecols_names(usecols, names):
-    """
-    Validates that all usecols are present in a given
-    list of names. If not, raise a ValueError that
-    shows what usecols are missing.
-
-    Parameters
-    ----------
-    usecols : iterable of usecols
-        The columns to validate are present in names.
-    names : iterable of names
-        The column names to check against.
-
-    Returns
-    -------
-    usecols : iterable of usecols
-        The `usecols` parameter if the validation succeeds.
-
-    Raises
-    ------
-    ValueError : Columns were missing. Error message will list them.
-    """
-    missing = [c for c in usecols if c not in names]
-    if len(missing) > 0:
-        raise ValueError(
-            f"Usecols do not match columns, columns expected but not found: {missing}"
-        )
-
-    return usecols
-
-
-def _validate_skipfooter_arg(skipfooter):
-    """
-    Validate the 'skipfooter' parameter.
-
-    Checks whether 'skipfooter' is a non-negative integer.
-    Raises a ValueError if that is not the case.
-
-    Parameters
-    ----------
-    skipfooter : non-negative integer
-        The number of rows to skip at the end of the file.
-
-    Returns
-    -------
-    validated_skipfooter : non-negative integer
-        The original input if the validation succeeds.
-
-    Raises
-    ------
-    ValueError : 'skipfooter' was not a non-negative integer.
-    """
-    if not is_integer(skipfooter):
-        raise ValueError("skipfooter must be an integer")
-
-    if skipfooter < 0:
-        raise ValueError("skipfooter cannot be negative")
-
-    return skipfooter
-
-
-def _validate_usecols_arg(usecols):
-    """
-    Validate the 'usecols' parameter.
-
-    Checks whether or not the 'usecols' parameter contains all integers
-    (column selection by index), strings (column by name) or is a callable.
-    Raises a ValueError if that is not the case.
-
-    Parameters
-    ----------
-    usecols : list-like, callable, or None
-        List of columns to use when parsing or a callable that can be used
-        to filter a list of table columns.
-
-    Returns
-    -------
-    usecols_tuple : tuple
-        A tuple of (verified_usecols, usecols_dtype).
-
-        'verified_usecols' is either a set if an array-like is passed in or
-        'usecols' if a callable or None is passed in.
-
-        'usecols_dtype` is the inferred dtype of 'usecols' if an array-like
-        is passed in or None if a callable or None is passed in.
-    """
-    msg = (
-        "'usecols' must either be list-like of all strings, all unicode, "
-        "all integers or a callable."
-    )
-    if usecols is not None:
-        if callable(usecols):
-            return usecols, None
-
-        if not is_list_like(usecols):
-            # see gh-20529
-            #
-            # Ensure it is iterable container but not string.
-            raise ValueError(msg)
-
-        usecols_dtype = lib.infer_dtype(usecols, skipna=False)
-
-        if usecols_dtype not in ("empty", "integer", "string"):
-            raise ValueError(msg)
-
-        usecols = set(usecols)
-
-        return usecols, usecols_dtype
-    return usecols, None
-
-
-def _validate_parse_dates_arg(parse_dates):
-    """
-    Check whether or not the 'parse_dates' parameter
-    is a non-boolean scalar. Raises a ValueError if
-    that is the case.
-    """
-    msg = (
-        "Only booleans, lists, and dictionaries are accepted "
-        "for the 'parse_dates' parameter"
-    )
-
-    if parse_dates is not None:
-        if is_scalar(parse_dates):
-            if not lib.is_bool(parse_dates):
-                raise TypeError(msg)
-
-        elif not isinstance(parse_dates, (list, dict)):
-            raise TypeError(msg)
-
-    return parse_dates
-
-
-class ParserBase:
-    def __init__(self, kwds):
-
-        self.names = kwds.get("names")
-        self.orig_names: Optional[List] = None
-        self.prefix = kwds.pop("prefix", None)
-
-        self.index_col = kwds.get("index_col", None)
-        self.unnamed_cols: Set = set()
-        self.index_names: Optional[List] = None
-        self.col_names = None
-
-        self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False))
-        self.date_parser = kwds.pop("date_parser", None)
-        self.dayfirst = kwds.pop("dayfirst", False)
-        self.keep_date_col = kwds.pop("keep_date_col", False)
-
-        self.na_values = kwds.get("na_values")
-        self.na_fvalues = kwds.get("na_fvalues")
-        self.na_filter = kwds.get("na_filter", False)
-        self.keep_default_na = kwds.get("keep_default_na", True)
-
-        self.true_values = kwds.get("true_values")
-        self.false_values = kwds.get("false_values")
-        self.mangle_dupe_cols = kwds.get("mangle_dupe_cols", True)
-        self.infer_datetime_format = kwds.pop("infer_datetime_format", False)
-        self.cache_dates = kwds.pop("cache_dates", True)
-
-        self._date_conv = _make_date_converter(
-            date_parser=self.date_parser,
-            dayfirst=self.dayfirst,
-            infer_datetime_format=self.infer_datetime_format,
-            cache_dates=self.cache_dates,
-        )
-
-        # validate header options for mi
-        self.header = kwds.get("header")
-        if isinstance(self.header, (list, tuple, np.ndarray)):
-            if not all(map(is_integer, self.header)):
-                raise ValueError("header must be integer or list of integers")
-            if any(i < 0 for i in self.header):
-                raise ValueError(
-                    "cannot specify multi-index header with negative integers"
-                )
-            if kwds.get("usecols"):
-                raise ValueError(
-                    "cannot specify usecols when specifying a multi-index header"
-                )
-            if kwds.get("names"):
-                raise ValueError(
-                    "cannot specify names when specifying a multi-index header"
-                )
-
-            # validate index_col that only contains integers
-            if self.index_col is not None:
-                is_sequence = isinstance(self.index_col, (list, tuple, np.ndarray))
-                if not (
-                    is_sequence
-                    and all(map(is_integer, self.index_col))
-                    or is_integer(self.index_col)
-                ):
-                    raise ValueError(
-                        "index_col must only contain row numbers "
-                        "when specifying a multi-index header"
-                    )
-        elif self.header is not None:
-            # GH 27394
-            if self.prefix is not None:
-                raise ValueError(
-                    "Argument prefix must be None if argument header is not None"
-                )
-            # GH 16338
-            elif not is_integer(self.header):
-                raise ValueError("header must be integer or list of integers")
-            # GH 27779
-            elif self.header < 0:
-                raise ValueError(
-                    "Passing negative integer to header is invalid. "
-                    "For no header, use header=None instead"
-                )
-
-        self._name_processed = False
-
-        self._first_chunk = True
-
-        self.handles: Optional[IOHandles] = None
-
-    def _open_handles(self, src: FilePathOrBuffer, kwds: Dict[str, Any]) -> None:
-        """
-        Let the readers open IOHanldes after they are done with their potential raises.
-        """
-        self.handles = get_handle(
-            src,
-            "r",
-            encoding=kwds.get("encoding", None),
-            compression=kwds.get("compression", None),
-            memory_map=kwds.get("memory_map", False),
-            storage_options=kwds.get("storage_options", None),
-        )
-
-    def _validate_parse_dates_presence(self, columns: List[str]) -> None:
-        """
-        Check if parse_dates are in columns.
-
-        If user has provided names for parse_dates, check if those columns
-        are available.
-
-        Parameters
-        ----------
-        columns : list
-            List of names of the dataframe.
-
-        Raises
-        ------
-        ValueError
-            If column to parse_date is not in dataframe.
-
-        """
-        cols_needed: Iterable
-        if is_dict_like(self.parse_dates):
-            cols_needed = itertools.chain(*self.parse_dates.values())
-        elif is_list_like(self.parse_dates):
-            # a column in parse_dates could be represented
-            # ColReference = Union[int, str]
-            # DateGroups = List[ColReference]
-            # ParseDates = Union[DateGroups, List[DateGroups],
-            #     Dict[ColReference, DateGroups]]
-            cols_needed = itertools.chain.from_iterable(
-                col if is_list_like(col) else [col] for col in self.parse_dates
-            )
-        else:
-            cols_needed = []
-
-        # get only columns that are references using names (str), not by index
-        missing_cols = ", ".join(
-            sorted(
-                {
-                    col
-                    for col in cols_needed
-                    if isinstance(col, str) and col not in columns
-                }
-            )
-        )
-        if missing_cols:
-            raise ValueError(
-                f"Missing column provided to 'parse_dates': '{missing_cols}'"
-            )
-
-    def close(self):
-        if self.handles is not None:
-            self.handles.close()
-
-    @property
-    def _has_complex_date_col(self):
-        return isinstance(self.parse_dates, dict) or (
-            isinstance(self.parse_dates, list)
-            and len(self.parse_dates) > 0
-            and isinstance(self.parse_dates[0], list)
-        )
-
-    def _should_parse_dates(self, i):
-        if isinstance(self.parse_dates, bool):
-            return self.parse_dates
-        else:
-            if self.index_names is not None:
-                name = self.index_names[i]
-            else:
-                name = None
-            j = i if self.index_col is None else self.index_col[i]
-
-            if is_scalar(self.parse_dates):
-                return (j == self.parse_dates) or (
-                    name is not None and name == self.parse_dates
-                )
-            else:
-                return (j in self.parse_dates) or (
-                    name is not None and name in self.parse_dates
-                )
-
-    def _extract_multi_indexer_columns(
-        self, header, index_names, col_names, passed_names=False
-    ):
-        """
-        extract and return the names, index_names, col_names
-        header is a list-of-lists returned from the parsers
-        """
-        if len(header) < 2:
-            return header[0], index_names, col_names, passed_names
-
-        # the names are the tuples of the header that are not the index cols
-        # 0 is the name of the index, assuming index_col is a list of column
-        # numbers
-        ic = self.index_col
-        if ic is None:
-            ic = []
-
-        if not isinstance(ic, (list, tuple, np.ndarray)):
-            ic = [ic]
-        sic = set(ic)
-
-        # clean the index_names
-        index_names = header.pop(-1)
-        index_names, _, _ = _clean_index_names(
-            index_names, self.index_col, self.unnamed_cols
-        )
-
-        # extract the columns
-        field_count = len(header[0])
-
-        def extract(r):
-            return tuple(r[i] for i in range(field_count) if i not in sic)
-
-        columns = list(zip(*(extract(r) for r in header)))
-        names = ic + columns
-
-        # If we find unnamed columns all in a single
-        # level, then our header was too long.
-        for n in range(len(columns[0])):
-            if all(ensure_str(col[n]) in self.unnamed_cols for col in columns):
-                header = ",".join(str(x) for x in self.header)
-                raise ParserError(
-                    f"Passed header=[{header}] are too many rows "
-                    "for this multi_index of columns"
-                )
-
-        # Clean the column names (if we have an index_col).
-        if len(ic):
-            col_names = [
-                r[0] if ((r[0] is not None) and r[0] not in self.unnamed_cols) else None
-                for r in header
-            ]
-        else:
-            col_names = [None] * len(header)
-
-        passed_names = True
-
-        return names, index_names, col_names, passed_names
-
-    def _maybe_dedup_names(self, names):
-        # see gh-7160 and gh-9424: this helps to provide
-        # immediate alleviation of the duplicate names
-        # issue and appears to be satisfactory to users,
-        # but ultimately, not needing to butcher the names
-        # would be nice!
-        if self.mangle_dupe_cols:
-            names = list(names)  # so we can index
-            # pandas\io\parsers.py:1559: error: Need type annotation for
-            # 'counts'  [var-annotated]
-            counts = defaultdict(int)  # type: ignore[var-annotated]
-            is_potential_mi = _is_potential_multi_index(names, self.index_col)
-
-            for i, col in enumerate(names):
-                cur_count = counts[col]
-
-                while cur_count > 0:
-                    counts[col] = cur_count + 1
-
-                    if is_potential_mi:
-                        col = col[:-1] + (f"{col[-1]}.{cur_count}",)
-                    else:
-                        col = f"{col}.{cur_count}"
-                    cur_count = counts[col]
-
-                names[i] = col
-                counts[col] = cur_count + 1
-
-        return names
-
-    def _maybe_make_multi_index_columns(self, columns, col_names=None):
-        # possibly create a column mi here
-        if _is_potential_multi_index(columns):
-            columns = MultiIndex.from_tuples(columns, names=col_names)
-        return columns
-
-    def _make_index(self, data, alldata, columns, indexnamerow=False):
-        if not _is_index_col(self.index_col) or not self.index_col:
-            index = None
-
-        elif not self._has_complex_date_col:
-            index = self._get_simple_index(alldata, columns)
-            index = self._agg_index(index)
-        elif self._has_complex_date_col:
-            if not self._name_processed:
-                (self.index_names, _, self.index_col) = _clean_index_names(
-                    list(columns), self.index_col, self.unnamed_cols
-                )
-                self._name_processed = True
-            index = self._get_complex_date_index(data, columns)
-            index = self._agg_index(index, try_parse_dates=False)
-
-        # add names for the index
-        if indexnamerow:
-            coffset = len(indexnamerow) - len(columns)
-            # pandas\io\parsers.py:1604: error: Item "None" of "Optional[Any]"
-            # has no attribute "set_names"  [union-attr]
-            index = index.set_names(indexnamerow[:coffset])  # type: ignore[union-attr]
-
-        # maybe create a mi on the columns
-        columns = self._maybe_make_multi_index_columns(columns, self.col_names)
-
-        return index, columns
-
-    _implicit_index = False
-
-    def _get_simple_index(self, data, columns):
-        def ix(col):
-            if not isinstance(col, str):
-                return col
-            raise ValueError(f"Index {col} invalid")
-
-        to_remove = []
-        index = []
-        for idx in self.index_col:
-            i = ix(idx)
-            to_remove.append(i)
-            index.append(data[i])
-
-        # remove index items from content and columns, don't pop in
-        # loop
-        for i in sorted(to_remove, reverse=True):
-            data.pop(i)
-            if not self._implicit_index:
-                columns.pop(i)
-
-        return index
-
-    def _get_complex_date_index(self, data, col_names):
-        def _get_name(icol):
-            if isinstance(icol, str):
-                return icol
-
-            if col_names is None:
-                raise ValueError(f"Must supply column order to use {icol!s} as index")
-
-            for i, c in enumerate(col_names):
-                if i == icol:
-                    return c
-
-        to_remove = []
-        index = []
-        for idx in self.index_col:
-            name = _get_name(idx)
-            to_remove.append(name)
-            index.append(data[name])
-
-        # remove index items from content and columns, don't pop in
-        # loop
-        for c in sorted(to_remove, reverse=True):
-            data.pop(c)
-            col_names.remove(c)
-
-        return index
-
-    def _agg_index(self, index, try_parse_dates=True) -> Index:
-        arrays = []
-
-        for i, arr in enumerate(index):
-
-            if try_parse_dates and self._should_parse_dates(i):
-                arr = self._date_conv(arr)
-
-            if self.na_filter:
-                col_na_values = self.na_values
-                col_na_fvalues = self.na_fvalues
-            else:
-                col_na_values = set()
-                col_na_fvalues = set()
-
-            if isinstance(self.na_values, dict):
-                # pandas\io\parsers.py:1678: error: Value of type
-                # "Optional[Any]" is not indexable  [index]
-                col_name = self.index_names[i]  # type: ignore[index]
-                if col_name is not None:
-                    col_na_values, col_na_fvalues = _get_na_values(
-                        col_name, self.na_values, self.na_fvalues, self.keep_default_na
-                    )
-
-            arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
-            arrays.append(arr)
-
-        names = self.index_names
-        index = ensure_index_from_sequences(arrays, names)
-
-        return index
-
-    def _convert_to_ndarrays(
-        self, dct, na_values, na_fvalues, verbose=False, converters=None, dtypes=None
-    ):
-        result = {}
-        for c, values in dct.items():
-            conv_f = None if converters is None else converters.get(c, None)
-            if isinstance(dtypes, dict):
-                cast_type = dtypes.get(c, None)
-            else:
-                # single dtype or None
-                cast_type = dtypes
-
-            if self.na_filter:
-                col_na_values, col_na_fvalues = _get_na_values(
-                    c, na_values, na_fvalues, self.keep_default_na
-                )
-            else:
-                col_na_values, col_na_fvalues = set(), set()
-
-            if conv_f is not None:
-                # conv_f applied to data before inference
-                if cast_type is not None:
-                    warnings.warn(
-                        (
-                            "Both a converter and dtype were specified "
-                            f"for column {c} - only the converter will be used"
-                        ),
-                        ParserWarning,
-                        stacklevel=7,
-                    )
-
-                try:
-                    values = lib.map_infer(values, conv_f)
-                except ValueError:
-                    mask = algorithms.isin(values, list(na_values)).view(np.uint8)
-                    values = lib.map_infer_mask(values, conv_f, mask)
-
-                cvals, na_count = self._infer_types(
-                    values, set(col_na_values) | col_na_fvalues, try_num_bool=False
-                )
-            else:
-                is_ea = is_extension_array_dtype(cast_type)
-                is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type)
-                # skip inference if specified dtype is object
-                # or casting to an EA
-                try_num_bool = not (cast_type and is_str_or_ea_dtype)
-
-                # general type inference and conversion
-                cvals, na_count = self._infer_types(
-                    values, set(col_na_values) | col_na_fvalues, try_num_bool
-                )
-
-                # type specified in dtype param or cast_type is an EA
-                if cast_type and (
-                    not is_dtype_equal(cvals, cast_type)
-                    or is_extension_array_dtype(cast_type)
-                ):
-                    if not is_ea and na_count > 0:
-                        try:
-                            if is_bool_dtype(cast_type):
-                                raise ValueError(
-                                    f"Bool column has NA values in column {c}"
-                                )
-                        except (AttributeError, TypeError):
-                            # invalid input to is_bool_dtype
-                            pass
-                    cast_type = pandas_dtype(cast_type)
-                    cvals = self._cast_types(cvals, cast_type, c)
-
-            result[c] = cvals
-            if verbose and na_count:
-                print(f"Filled {na_count} NA values in column {c!s}")
-        return result
-
-    def _infer_types(self, values, na_values, try_num_bool=True):
-        """
-        Infer types of values, possibly casting
-
-        Parameters
-        ----------
-        values : ndarray
-        na_values : set
-        try_num_bool : bool, default try
-           try to cast values to numeric (first preference) or boolean
-
-        Returns
-        -------
-        converted : ndarray
-        na_count : int
-        """
-        na_count = 0
-        if issubclass(values.dtype.type, (np.number, np.bool_)):
-            mask = algorithms.isin(values, list(na_values))
-            na_count = mask.sum()
-            if na_count > 0:
-                if is_integer_dtype(values):
-                    values = values.astype(np.float64)
-                np.putmask(values, mask, np.nan)
-            return values, na_count
-
-        if try_num_bool and is_object_dtype(values.dtype):
-            # exclude e.g DatetimeIndex here
-            try:
-                result = lib.maybe_convert_numeric(values, na_values, False)
-            except (ValueError, TypeError):
-                # e.g. encountering datetime string gets ValueError
-                #  TypeError can be raised in floatify
-                result = values
-                na_count = parsers.sanitize_objects(result, na_values, False)
-            else:
-                na_count = isna(result).sum()
-        else:
-            result = values
-            if values.dtype == np.object_:
-                na_count = parsers.sanitize_objects(values, na_values, False)
-
-        if result.dtype == np.object_ and try_num_bool:
-            result = libops.maybe_convert_bool(
-                np.asarray(values),
-                true_values=self.true_values,
-                false_values=self.false_values,
-            )
-
-        return result, na_count
-
-    def _cast_types(self, values, cast_type, column):
-        """
-        Cast values to specified type
-
-        Parameters
-        ----------
-        values : ndarray
-        cast_type : string or np.dtype
-           dtype to cast values to
-        column : string
-            column name - used only for error reporting
-
-        Returns
-        -------
-        converted : ndarray
-        """
-        if is_categorical_dtype(cast_type):
-            known_cats = (
-                isinstance(cast_type, CategoricalDtype)
-                and cast_type.categories is not None
-            )
-
-            if not is_object_dtype(values) and not known_cats:
-                # TODO: this is for consistency with
-                # c-parser which parses all categories
-                # as strings
-                values = astype_nansafe(values, str)
-
-            cats = Index(values).unique().dropna()
-            values = Categorical._from_inferred_categories(
-                cats, cats.get_indexer(values), cast_type, true_values=self.true_values
-            )
-
-        # use the EA's implementation of casting
-        elif is_extension_array_dtype(cast_type):
-            # ensure cast_type is an actual dtype and not a string
-            cast_type = pandas_dtype(cast_type)
-            array_type = cast_type.construct_array_type()
-            try:
-                if is_bool_dtype(cast_type):
-                    return array_type._from_sequence_of_strings(
-                        values,
-                        dtype=cast_type,
-                        true_values=self.true_values,
-                        false_values=self.false_values,
-                    )
-                else:
-                    return array_type._from_sequence_of_strings(values, dtype=cast_type)
-            except NotImplementedError as err:
-                raise NotImplementedError(
-                    f"Extension Array: {array_type} must implement "
-                    "_from_sequence_of_strings in order to be used in parser methods"
-                ) from err
-
-        else:
-            try:
-                values = astype_nansafe(values, cast_type, copy=True, skipna=True)
-            except ValueError as err:
-                raise ValueError(
-                    f"Unable to convert column {column} to type {cast_type}"
-                ) from err
-        return values
-
-    def _do_date_conversions(self, names, data):
-        # returns data, columns
-
-        if self.parse_dates is not None:
-            data, names = _process_date_conversion(
-                data,
-                self._date_conv,
-                self.parse_dates,
-                self.index_col,
-                self.index_names,
-                names,
-                keep_date_col=self.keep_date_col,
-            )
-
-        return names, data
-
-
-class CParserWrapper(ParserBase):
-    def __init__(self, src: FilePathOrBuffer, **kwds):
-        self.kwds = kwds
-        kwds = kwds.copy()
-
-        ParserBase.__init__(self, kwds)
-
-        # #2442
-        kwds["allow_leading_cols"] = self.index_col is not False
-
-        # GH20529, validate usecol arg before TextReader
-        self.usecols, self.usecols_dtype = _validate_usecols_arg(kwds["usecols"])
-        kwds["usecols"] = self.usecols
-
-        # open handles
-        self._open_handles(src, kwds)
-        assert self.handles is not None
-        for key in ("storage_options", "encoding", "memory_map", "compression"):
-            kwds.pop(key, None)
-        if self.handles.is_mmap and hasattr(self.handles.handle, "mmap"):
-            # pandas\io\parsers.py:1861: error: Item "IO[Any]" of
-            # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
-            # TextIOWrapper, mmap]" has no attribute "mmap"  [union-attr]
-
-            # pandas\io\parsers.py:1861: error: Item "RawIOBase" of
-            # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
-            # TextIOWrapper, mmap]" has no attribute "mmap"  [union-attr]
-
-            # pandas\io\parsers.py:1861: error: Item "BufferedIOBase" of
-            # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
-            # TextIOWrapper, mmap]" has no attribute "mmap"  [union-attr]
-
-            # pandas\io\parsers.py:1861: error: Item "TextIOBase" of
-            # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
-            # TextIOWrapper, mmap]" has no attribute "mmap"  [union-attr]
-
-            # pandas\io\parsers.py:1861: error: Item "TextIOWrapper" of
-            # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
-            # TextIOWrapper, mmap]" has no attribute "mmap"  [union-attr]
-
-            # pandas\io\parsers.py:1861: error: Item "mmap" of "Union[IO[Any],
-            # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]" has
-            # no attribute "mmap"  [union-attr]
-            self.handles.handle = self.handles.handle.mmap  # type: ignore[union-attr]
-
-        try:
-            self._reader = parsers.TextReader(self.handles.handle, **kwds)
-        except Exception:
-            self.handles.close()
-            raise
-        self.unnamed_cols = self._reader.unnamed_cols
-
-        passed_names = self.names is None
-
-        if self._reader.header is None:
-            self.names = None
-        else:
-            if len(self._reader.header) > 1:
-                # we have a multi index in the columns
-                (
-                    self.names,
-                    self.index_names,
-                    self.col_names,
-                    passed_names,
-                ) = self._extract_multi_indexer_columns(
-                    self._reader.header, self.index_names, self.col_names, passed_names
-                )
-            else:
-                self.names = list(self._reader.header[0])
-
-        if self.names is None:
-            if self.prefix:
-                self.names = [
-                    f"{self.prefix}{i}" for i in range(self._reader.table_width)
-                ]
-            else:
-                self.names = list(range(self._reader.table_width))
-
-        # gh-9755
-        #
-        # need to set orig_names here first
-        # so that proper indexing can be done
-        # with _set_noconvert_columns
-        #
-        # once names has been filtered, we will
-        # then set orig_names again to names
-        self.orig_names = self.names[:]
-
-        if self.usecols:
-            usecols = _evaluate_usecols(self.usecols, self.orig_names)
-
-            # GH 14671
-            # assert for mypy, orig_names is List or None, None would error in issubset
-            assert self.orig_names is not None
-            if self.usecols_dtype == "string" and not set(usecols).issubset(
-                self.orig_names
-            ):
-                _validate_usecols_names(usecols, self.orig_names)
-
-            if len(self.names) > len(usecols):
-                self.names = [
-                    n
-                    for i, n in enumerate(self.names)
-                    if (i in usecols or n in usecols)
-                ]
-
-            if len(self.names) < len(usecols):
-                _validate_usecols_names(usecols, self.names)
-
-        self._validate_parse_dates_presence(self.names)
-        self._set_noconvert_columns()
-
-        self.orig_names = self.names
-
-        if not self._has_complex_date_col:
-            if self._reader.leading_cols == 0 and _is_index_col(self.index_col):
-
-                self._name_processed = True
-                (index_names, self.names, self.index_col) = _clean_index_names(
-                    self.names, self.index_col, self.unnamed_cols
-                )
-
-                if self.index_names is None:
-                    self.index_names = index_names
-
-            if self._reader.header is None and not passed_names:
-                # pandas\io\parsers.py:1997: error: Argument 1 to "len" has
-                # incompatible type "Optional[Any]"; expected "Sized"
-                # [arg-type]
-                self.index_names = [None] * len(
-                    self.index_names  # type: ignore[arg-type]
-                )
-
-        self._implicit_index = self._reader.leading_cols > 0
-
-    def close(self) -> None:
-        super().close()
-
-        # close additional handles opened by C parser
-        try:
-            self._reader.close()
-        except ValueError:
-            pass
-
-    def _set_noconvert_columns(self):
-        """
-        Set the columns that should not undergo dtype conversions.
-
-        Currently, any column that is involved with date parsing will not
-        undergo such conversions.
-        """
-        names = self.orig_names
-        if self.usecols_dtype == "integer":
-            # A set of integers will be converted to a list in
-            # the correct order every single time.
-            usecols = list(self.usecols)
-            usecols.sort()
-        elif callable(self.usecols) or self.usecols_dtype not in ("empty", None):
-            # The names attribute should have the correct columns
-            # in the proper order for indexing with parse_dates.
-            usecols = self.names[:]
-        else:
-            # Usecols is empty.
-
-            # pandas\io\parsers.py:2030: error: Incompatible types in
-            # assignment (expression has type "None", variable has type
-            # "List[Any]")  [assignment]
-            usecols = None  # type: ignore[assignment]
-
-        def _set(x):
-            if usecols is not None and is_integer(x):
-                x = usecols[x]
-
-            if not is_integer(x):
-                # assert for mypy, names is List or None, None would error when calling
-                # .index()
-                assert names is not None
-                x = names.index(x)
-
-            self._reader.set_noconvert(x)
-
-        if isinstance(self.parse_dates, list):
-            for val in self.parse_dates:
-                if isinstance(val, list):
-                    for k in val:
-                        _set(k)
-                else:
-                    _set(val)
-
-        elif isinstance(self.parse_dates, dict):
-            for val in self.parse_dates.values():
-                if isinstance(val, list):
-                    for k in val:
-                        _set(k)
-                else:
-                    _set(val)
-
-        elif self.parse_dates:
-            if isinstance(self.index_col, list):
-                for k in self.index_col:
-                    _set(k)
-            elif self.index_col is not None:
-                _set(self.index_col)
-
-    def set_error_bad_lines(self, status):
-        self._reader.set_error_bad_lines(int(status))
-
-    def read(self, nrows=None):
-        try:
-            data = self._reader.read(nrows)
-        except StopIteration:
-            if self._first_chunk:
-                self._first_chunk = False
-                names = self._maybe_dedup_names(self.orig_names)
-                index, columns, col_dict = _get_empty_meta(
-                    names,
-                    self.index_col,
-                    self.index_names,
-                    dtype=self.kwds.get("dtype"),
-                )
-                columns = self._maybe_make_multi_index_columns(columns, self.col_names)
-
-                if self.usecols is not None:
-                    columns = self._filter_usecols(columns)
-
-                col_dict = {k: v for k, v in col_dict.items() if k in columns}
-
-                return index, columns, col_dict
-
-            else:
-                self.close()
-                raise
-
-        # Done with first read, next time raise StopIteration
-        self._first_chunk = False
-
-        names = self.names
-
-        if self._reader.leading_cols:
-            if self._has_complex_date_col:
-                raise NotImplementedError("file structure not yet supported")
-
-            # implicit index, no index names
-            arrays = []
-
-            for i in range(self._reader.leading_cols):
-                if self.index_col is None:
-                    values = data.pop(i)
-                else:
-                    values = data.pop(self.index_col[i])
-
-                values = self._maybe_parse_dates(values, i, try_parse_dates=True)
-                arrays.append(values)
-
-            index = ensure_index_from_sequences(arrays)
-
-            if self.usecols is not None:
-                names = self._filter_usecols(names)
-
-            names = self._maybe_dedup_names(names)
-
-            # rename dict keys
-            data = sorted(data.items())
-            data = {k: v for k, (i, v) in zip(names, data)}
-
-            names, data = self._do_date_conversions(names, data)
-
-        else:
-            # rename dict keys
-            data = sorted(data.items())
-
-            # ugh, mutation
-
-            # assert for mypy, orig_names is List or None, None would error in list(...)
-            assert self.orig_names is not None
-            names = list(self.orig_names)
-            names = self._maybe_dedup_names(names)
-
-            if self.usecols is not None:
-                names = self._filter_usecols(names)
-
-            # columns as list
-            alldata = [x[1] for x in data]
-
-            data = {k: v for k, (i, v) in zip(names, data)}
-
-            names, data = self._do_date_conversions(names, data)
-            index, names = self._make_index(data, alldata, names)
-
-        # maybe create a mi on the columns
-        names = self._maybe_make_multi_index_columns(names, self.col_names)
-
-        return index, names, data
-
-    def _filter_usecols(self, names):
-        # hackish
-        usecols = _evaluate_usecols(self.usecols, names)
-        if usecols is not None and len(names) != len(usecols):
-            names = [
-                name for i, name in enumerate(names) if i in usecols or name in usecols
-            ]
-        return names
-
-    def _get_index_names(self):
-        names = list(self._reader.header[0])
-        idx_names = None
-
-        if self._reader.leading_cols == 0 and self.index_col is not None:
-            (idx_names, names, self.index_col) = _clean_index_names(
-                names, self.index_col, self.unnamed_cols
-            )
-
-        return names, idx_names
-
-    def _maybe_parse_dates(self, values, index, try_parse_dates=True):
-        if try_parse_dates and self._should_parse_dates(index):
-            values = self._date_conv(values)
-        return values
-
-
-def TextParser(*args, **kwds):
-    """
-    Converts lists of lists/tuples into DataFrames with proper type inference
-    and optional (e.g. string to datetime) conversion. Also enables iterating
-    lazily over chunks of large files
-
-    Parameters
-    ----------
-    data : file-like object or list
-    delimiter : separator character to use
-    dialect : str or csv.Dialect instance, optional
-        Ignored if delimiter is longer than 1 character
-    names : sequence, default
-    header : int, default 0
-        Row to use to parse column labels. Defaults to the first row. Prior
-        rows will be discarded
-    index_col : int or list, optional
-        Column or columns to use as the (possibly hierarchical) index
-    has_index_names: bool, default False
-        True if the cols defined in index_col have an index name and are
-        not in the header.
-    na_values : scalar, str, list-like, or dict, optional
-        Additional strings to recognize as NA/NaN.
-    keep_default_na : bool, default True
-    thousands : str, optional
-        Thousands separator
-    comment : str, optional
-        Comment out remainder of line
-    parse_dates : bool, default False
-    keep_date_col : bool, default False
-    date_parser : function, optional
-    skiprows : list of integers
-        Row numbers to skip
-    skipfooter : int
-        Number of line at bottom of file to skip
-    converters : dict, optional
-        Dict of functions for converting values in certain columns. Keys can
-        either be integers or column labels, values are functions that take one
-        input argument, the cell (not column) content, and return the
-        transformed content.
-    encoding : str, optional
-        Encoding to use for UTF when reading/writing (ex. 'utf-8')
-    squeeze : bool, default False
-        returns Series if only one column.
-    infer_datetime_format: bool, default False
-        If True and `parse_dates` is True for a column, try to infer the
-        datetime format based on the first datetime string. If the format
-        can be inferred, there often will be a large parsing speed-up.
-    float_precision : str, optional
-        Specifies which converter the C engine should use for floating-point
-        values. The options are `None` or `high` for the ordinary converter,
-        `legacy` for the original lower precision pandas converter, and
-        `round_trip` for the round-trip converter.
-
-        .. versionchanged:: 1.2
-    """
-    kwds["engine"] = "python"
-    return TextFileReader(*args, **kwds)
-
-
-def count_empty_vals(vals) -> int:
-    return sum(1 for v in vals if v == "" or v is None)
-
-
-class PythonParser(ParserBase):
-    def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
-        """
-        Workhorse function for processing nested list into DataFrame
-        """
-        ParserBase.__init__(self, kwds)
-
-        self.data: Optional[Iterator[str]] = None
-        self.buf: List = []
-        self.pos = 0
-        self.line_pos = 0
-
-        self.skiprows = kwds["skiprows"]
-
-        if callable(self.skiprows):
-            self.skipfunc = self.skiprows
-        else:
-            self.skipfunc = lambda x: x in self.skiprows
-
-        self.skipfooter = _validate_skipfooter_arg(kwds["skipfooter"])
-        self.delimiter = kwds["delimiter"]
-
-        self.quotechar = kwds["quotechar"]
-        if isinstance(self.quotechar, str):
-            self.quotechar = str(self.quotechar)
-
-        self.escapechar = kwds["escapechar"]
-        self.doublequote = kwds["doublequote"]
-        self.skipinitialspace = kwds["skipinitialspace"]
-        self.lineterminator = kwds["lineterminator"]
-        self.quoting = kwds["quoting"]
-        self.usecols, _ = _validate_usecols_arg(kwds["usecols"])
-        self.skip_blank_lines = kwds["skip_blank_lines"]
-
-        self.warn_bad_lines = kwds["warn_bad_lines"]
-        self.error_bad_lines = kwds["error_bad_lines"]
-
-        self.names_passed = kwds["names"] or None
-
-        self.has_index_names = False
-        if "has_index_names" in kwds:
-            self.has_index_names = kwds["has_index_names"]
-
-        self.verbose = kwds["verbose"]
-        self.converters = kwds["converters"]
-
-        self.dtype = kwds["dtype"]
-        self.thousands = kwds["thousands"]
-        self.decimal = kwds["decimal"]
-
-        self.comment = kwds["comment"]
-
-        # Set self.data to something that can read lines.
-        if isinstance(f, list):
-            # read_excel: f is a list
-            self.data = cast(Iterator[str], f)
-        else:
-            self._open_handles(f, kwds)
-            assert self.handles is not None
-            assert hasattr(self.handles.handle, "readline")
-            try:
-                self._make_reader(self.handles.handle)
-            except (csv.Error, UnicodeDecodeError):
-                self.close()
-                raise
-
-        # Get columns in two steps: infer from data, then
-        # infer column indices from self.usecols if it is specified.
-        self._col_indices: Optional[List[int]] = None
-        try:
-            (
-                self.columns,
-                self.num_original_columns,
-                self.unnamed_cols,
-            ) = self._infer_columns()
-        except (TypeError, ValueError):
-            self.close()
-            raise
-
-        # Now self.columns has the set of columns that we will process.
-        # The original set is stored in self.original_columns.
-        if len(self.columns) > 1:
-            # we are processing a multi index column
-            (
-                self.columns,
-                self.index_names,
-                self.col_names,
-                _,
-            ) = self._extract_multi_indexer_columns(
-                self.columns, self.index_names, self.col_names
-            )
-            # Update list of original names to include all indices.
-            self.num_original_columns = len(self.columns)
-        else:
-            self.columns = self.columns[0]
-
-        # get popped off for index
-        self.orig_names = list(self.columns)
-
-        # needs to be cleaned/refactored
-        # multiple date column thing turning into a real spaghetti factory
-
-        if not self._has_complex_date_col:
-            (index_names, self.orig_names, self.columns) = self._get_index_name(
-                self.columns
-            )
-            self._name_processed = True
-            if self.index_names is None:
-                self.index_names = index_names
-
-        if self._col_indices is None:
-            self._col_indices = list(range(len(self.columns)))
-
-        self._validate_parse_dates_presence(self.columns)
-        if self.parse_dates:
-            self._no_thousands_columns = self._set_no_thousands_columns()
-        else:
-            self._no_thousands_columns = None
-
-        if len(self.decimal) != 1:
-            raise ValueError("Only length-1 decimal markers supported")
-
-        decimal = re.escape(self.decimal)
-        if self.thousands is None:
-            regex = fr"^[\-\+]?[0-9]*({decimal}[0-9]*)?([0-9]?(E|e)\-?[0-9]+)?$"
-        else:
-            thousands = re.escape(self.thousands)
-            regex = (
-                fr"^[\-\+]?([0-9]+{thousands}|[0-9])*({decimal}[0-9]*)?"
-                fr"([0-9]?(E|e)\-?[0-9]+)?$"
-            )
-        self.num = re.compile(regex)
-
-    def _set_no_thousands_columns(self):
-        # Create a set of column ids that are not to be stripped of thousands
-        # operators.
-        noconvert_columns = set()
-
-        def _set(x):
-            if is_integer(x):
-                noconvert_columns.add(x)
-            else:
-                assert self._col_indices is not None
-                col_indices = self._col_indices
-                noconvert_columns.add(col_indices[self.columns.index(x)])
-
-        if isinstance(self.parse_dates, list):
-            for val in self.parse_dates:
-                if isinstance(val, list):
-                    for k in val:
-                        _set(k)
-                else:
-                    _set(val)
-
-        elif isinstance(self.parse_dates, dict):
-            for val in self.parse_dates.values():
-                if isinstance(val, list):
-                    for k in val:
-                        _set(k)
-                else:
-                    _set(val)
-
-        elif self.parse_dates:
-            if isinstance(self.index_col, list):
-                for k in self.index_col:
-                    _set(k)
-            elif self.index_col is not None:
-                _set(self.index_col)
-
-        return noconvert_columns
-
-    def _make_reader(self, f):
-        sep = self.delimiter
-
-        if sep is None or len(sep) == 1:
-            if self.lineterminator:
-                raise ValueError(
-                    "Custom line terminators not supported in python parser (yet)"
-                )
-
-            class MyDialect(csv.Dialect):
-                delimiter = self.delimiter
-                quotechar = self.quotechar
-                escapechar = self.escapechar
-                doublequote = self.doublequote
-                skipinitialspace = self.skipinitialspace
-                quoting = self.quoting
-                lineterminator = "\n"
-
-            dia = MyDialect
-
-            if sep is not None:
-                dia.delimiter = sep
-            else:
-                # attempt to sniff the delimiter from the first valid line,
-                # i.e. no comment line and not in skiprows
-                line = f.readline()
-                lines = self._check_comments([[line]])[0]
-                while self.skipfunc(self.pos) or not lines:
-                    self.pos += 1
-                    line = f.readline()
-                    lines = self._check_comments([[line]])[0]
-
-                # since `line` was a string, lines will be a list containing
-                # only a single string
-                line = lines[0]
-
-                self.pos += 1
-                self.line_pos += 1
-                sniffed = csv.Sniffer().sniff(line)
-                dia.delimiter = sniffed.delimiter
-
-                # Note: encoding is irrelevant here
-                line_rdr = csv.reader(StringIO(line), dialect=dia)
-                self.buf.extend(list(line_rdr))
-
-            # Note: encoding is irrelevant here
-            reader = csv.reader(f, dialect=dia, strict=True)
-
-        else:
-
-            def _read():
-                line = f.readline()
-                pat = re.compile(sep)
-
-                yield pat.split(line.strip())
-
-                for line in f:
-                    yield pat.split(line.strip())
-
-            reader = _read()
-
-        # pandas\io\parsers.py:2427: error: Incompatible types in assignment
-        # (expression has type "_reader", variable has type "Union[IO[Any],
-        # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap, None]")
-        # [assignment]
-        self.data = reader  # type: ignore[assignment]
-
-    def read(self, rows=None):
-        try:
-            content = self._get_lines(rows)
-        except StopIteration:
-            if self._first_chunk:
-                content = []
-            else:
-                self.close()
-                raise
-
-        # done with first read, next time raise StopIteration
-        self._first_chunk = False
-
-        # pandas\io\parsers.py:2480: error: Argument 1 to "list" has
-        # incompatible type "Optional[Any]"; expected "Iterable[Any]"
-        # [arg-type]
-        columns = list(self.orig_names)  # type: ignore[arg-type]
-        if not len(content):  # pragma: no cover
-            # DataFrame with the right metadata, even though it's length 0
-            names = self._maybe_dedup_names(self.orig_names)
-            index, columns, col_dict = _get_empty_meta(
-                names, self.index_col, self.index_names, self.dtype
-            )
-            columns = self._maybe_make_multi_index_columns(columns, self.col_names)
-            return index, columns, col_dict
-
-        # handle new style for names in index
-        count_empty_content_vals = count_empty_vals(content[0])
-        indexnamerow = None
-        if self.has_index_names and count_empty_content_vals == len(columns):
-            indexnamerow = content[0]
-            content = content[1:]
-
-        alldata = self._rows_to_cols(content)
-        data, columns = self._exclude_implicit_index(alldata)
-
-        columns, data = self._do_date_conversions(columns, data)
-
-        data = self._convert_data(data)
-        index, columns = self._make_index(data, alldata, columns, indexnamerow)
-
-        return index, columns, data
-
-    def _exclude_implicit_index(self, alldata):
-        names = self._maybe_dedup_names(self.orig_names)
-
-        offset = 0
-        if self._implicit_index:
-            offset = len(self.index_col)
-
-        if self._col_indices is not None and len(names) != len(self._col_indices):
-            names = [names[i] for i in sorted(self._col_indices)]
-
-        return {name: alldata[i + offset] for i, name in enumerate(names)}, names
-
-    # legacy
-    def get_chunk(self, size=None):
-        if size is None:
-            # pandas\io\parsers.py:2528: error: "PythonParser" has no attribute
-            # "chunksize"  [attr-defined]
-            size = self.chunksize  # type: ignore[attr-defined]
-        return self.read(rows=size)
-
-    def _convert_data(self, data):
-        # apply converters
-        def _clean_mapping(mapping):
-            """converts col numbers to names"""
-            clean = {}
-            for col, v in mapping.items():
-                # pandas\io\parsers.py:2537: error: Unsupported right operand
-                # type for in ("Optional[Any]")  [operator]
-                if (
-                    isinstance(col, int)
-                    and col not in self.orig_names  # type: ignore[operator]
-                ):
-                    # pandas\io\parsers.py:2538: error: Value of type
-                    # "Optional[Any]" is not indexable  [index]
-                    col = self.orig_names[col]  # type: ignore[index]
-                clean[col] = v
-            return clean
-
-        clean_conv = _clean_mapping(self.converters)
-        if not isinstance(self.dtype, dict):
-            # handles single dtype applied to all columns
-            clean_dtypes = self.dtype
-        else:
-            clean_dtypes = _clean_mapping(self.dtype)
-
-        # Apply NA values.
-        clean_na_values = {}
-        clean_na_fvalues = {}
-
-        if isinstance(self.na_values, dict):
-            for col in self.na_values:
-                na_value = self.na_values[col]
-                na_fvalue = self.na_fvalues[col]
-
-                # pandas\io\parsers.py:2558: error: Unsupported right operand
-                # type for in ("Optional[Any]")  [operator]
-                if (
-                    isinstance(col, int)
-                    and col not in self.orig_names  # type: ignore[operator]
-                ):
-                    # pandas\io\parsers.py:2559: error: Value of type
-                    # "Optional[Any]" is not indexable  [index]
-                    col = self.orig_names[col]  # type: ignore[index]
-
-                clean_na_values[col] = na_value
-                clean_na_fvalues[col] = na_fvalue
-        else:
-            clean_na_values = self.na_values
-            clean_na_fvalues = self.na_fvalues
-
-        return self._convert_to_ndarrays(
-            data,
-            clean_na_values,
-            clean_na_fvalues,
-            self.verbose,
-            clean_conv,
-            clean_dtypes,
-        )
-
-    def _infer_columns(self):
-        names = self.names
-        num_original_columns = 0
-        clear_buffer = True
-        # pandas\io\parsers.py:2580: error: Need type annotation for
-        # 'unnamed_cols' (hint: "unnamed_cols: Set[<type>] = ...")
-        # [var-annotated]
-        unnamed_cols = set()  # type: ignore[var-annotated]
-
-        if self.header is not None:
-            header = self.header
-
-            if isinstance(header, (list, tuple, np.ndarray)):
-                have_mi_columns = len(header) > 1
-                # we have a mi columns, so read an extra line
-                if have_mi_columns:
-                    header = list(header) + [header[-1] + 1]
-            else:
-                have_mi_columns = False
-                header = [header]
-
-            # pandas\io\parsers.py:2594: error: Need type annotation for
-            # 'columns' (hint: "columns: List[<type>] = ...")  [var-annotated]
-            columns = []  # type: ignore[var-annotated]
-            for level, hr in enumerate(header):
-                try:
-                    line = self._buffered_line()
-
-                    while self.line_pos <= hr:
-                        line = self._next_line()
-
-                except StopIteration as err:
-                    if self.line_pos < hr:
-                        raise ValueError(
-                            f"Passed header={hr} but only {self.line_pos + 1} lines in "
-                            "file"
-                        ) from err
-
-                    # We have an empty file, so check
-                    # if columns are provided. That will
-                    # serve as the 'line' for parsing
-                    if have_mi_columns and hr > 0:
-                        if clear_buffer:
-                            self._clear_buffer()
-                        columns.append([None] * len(columns[-1]))
-                        return columns, num_original_columns, unnamed_cols
-
-                    if not self.names:
-                        raise EmptyDataError("No columns to parse from file") from err
-
-                    line = self.names[:]
-
-                this_columns = []
-                this_unnamed_cols = []
-
-                for i, c in enumerate(line):
-                    if c == "":
-                        if have_mi_columns:
-                            col_name = f"Unnamed: {i}_level_{level}"
-                        else:
-                            col_name = f"Unnamed: {i}"
-
-                        this_unnamed_cols.append(i)
-                        this_columns.append(col_name)
-                    else:
-                        this_columns.append(c)
-
-                if not have_mi_columns and self.mangle_dupe_cols:
-                    # pandas\io\parsers.py:2639: error: Need type annotation
-                    # for 'counts'  [var-annotated]
-                    counts = defaultdict(int)  # type: ignore[var-annotated]
-
-                    for i, col in enumerate(this_columns):
-                        cur_count = counts[col]
-
-                        while cur_count > 0:
-                            counts[col] = cur_count + 1
-                            col = f"{col}.{cur_count}"
-                            cur_count = counts[col]
-
-                        this_columns[i] = col
-                        counts[col] = cur_count + 1
-                elif have_mi_columns:
-
-                    # if we have grabbed an extra line, but its not in our
-                    # format so save in the buffer, and create an blank extra
-                    # line for the rest of the parsing code
-                    if hr == header[-1]:
-                        lc = len(this_columns)
-                        ic = len(self.index_col) if self.index_col is not None else 0
-                        unnamed_count = len(this_unnamed_cols)
-
-                        if lc != unnamed_count and lc - ic > unnamed_count:
-                            clear_buffer = False
-                            # pandas\io\parsers.py:2663: error: List item 0 has
-                            # incompatible type "None"; expected "str"
-                            # [list-item]
-                            this_columns = [None] * lc  # type: ignore[list-item]
-                            self.buf = [self.buf[-1]]
-
-                # pandas\io\parsers.py:2666: error: Argument 1 to "append" of
-                # "list" has incompatible type "List[str]"; expected
-                # "List[None]"  [arg-type]
-                columns.append(this_columns)  # type: ignore[arg-type]
-                unnamed_cols.update({this_columns[i] for i in this_unnamed_cols})
-
-                if len(columns) == 1:
-                    num_original_columns = len(this_columns)
-
-            if clear_buffer:
-                self._clear_buffer()
-
-            if names is not None:
-                if len(names) > len(columns[0]):
-                    raise ValueError(
-                        "Number of passed names did not match "
-                        "number of header fields in the file"
-                    )
-                if len(columns) > 1:
-                    raise TypeError("Cannot pass names with multi-index columns")
-
-                if self.usecols is not None:
-                    # Set _use_cols. We don't store columns because they are
-                    # overwritten.
-                    self._handle_usecols(columns, names)
-                else:
-                    num_original_columns = len(names)
-                columns = [names]
-            else:
-                columns = self._handle_usecols(columns, columns[0])
-        else:
-            try:
-                line = self._buffered_line()
-
-            except StopIteration as err:
-                if not names:
-                    raise EmptyDataError("No columns to parse from file") from err
-
-                line = names[:]
-
-            ncols = len(line)
-            num_original_columns = ncols
-
-            if not names:
-                if self.prefix:
-                    # pandas\io\parsers.py:2711: error: List comprehension has
-                    # incompatible type List[str]; expected List[None]  [misc]
-                    columns = [
-                        [
-                            f"{self.prefix}{i}"  # type: ignore[misc]
-                            for i in range(ncols)
-                        ]
-                    ]
-                else:
-                    # pandas\io\parsers.py:2713: error: Argument 1 to "list"
-                    # has incompatible type "range"; expected "Iterable[None]"
-                    # [arg-type]
-                    columns = [list(range(ncols))]  # type: ignore[arg-type]
-                columns = self._handle_usecols(columns, columns[0])
-            else:
-                if self.usecols is None or len(names) >= num_original_columns:
-                    columns = self._handle_usecols([names], names)
-                    num_original_columns = len(names)
-                else:
-                    if not callable(self.usecols) and len(names) != len(self.usecols):
-                        raise ValueError(
-                            "Number of passed names did not match number of "
-                            "header fields in the file"
-                        )
-                    # Ignore output but set used columns.
-                    self._handle_usecols([names], names)
-                    columns = [names]
-                    num_original_columns = ncols
-
-        return columns, num_original_columns, unnamed_cols
-
-    def _handle_usecols(self, columns, usecols_key):
-        """
-        Sets self._col_indices
-
-        usecols_key is used if there are string usecols.
-        """
-        if self.usecols is not None:
-            if callable(self.usecols):
-                col_indices = _evaluate_usecols(self.usecols, usecols_key)
-            elif any(isinstance(u, str) for u in self.usecols):
-                if len(columns) > 1:
-                    raise ValueError(
-                        "If using multiple headers, usecols must be integers."
-                    )
-                col_indices = []
-
-                for col in self.usecols:
-                    if isinstance(col, str):
-                        try:
-                            col_indices.append(usecols_key.index(col))
-                        except ValueError:
-                            _validate_usecols_names(self.usecols, usecols_key)
-                    else:
-                        col_indices.append(col)
-            else:
-                col_indices = self.usecols
-
-            columns = [
-                [n for i, n in enumerate(column) if i in col_indices]
-                for column in columns
-            ]
-            self._col_indices = sorted(col_indices)
-        return columns
-
-    def _buffered_line(self):
-        """
-        Return a line from buffer, filling buffer if required.
-        """
-        if len(self.buf) > 0:
-            return self.buf[0]
-        else:
-            return self._next_line()
-
-    def _check_for_bom(self, first_row):
-        """
-        Checks whether the file begins with the BOM character.
-        If it does, remove it. In addition, if there is quoting
-        in the field subsequent to the BOM, remove it as well
-        because it technically takes place at the beginning of
-        the name, not the middle of it.
-        """
-        # first_row will be a list, so we need to check
-        # that that list is not empty before proceeding.
-        if not first_row:
-            return first_row
-
-        # The first element of this row is the one that could have the
-        # BOM that we want to remove. Check that the first element is a
-        # string before proceeding.
-        if not isinstance(first_row[0], str):
-            return first_row
-
-        # Check that the string is not empty, as that would
-        # obviously not have a BOM at the start of it.
-        if not first_row[0]:
-            return first_row
-
-        # Since the string is non-empty, check that it does
-        # in fact begin with a BOM.
-        first_elt = first_row[0][0]
-        if first_elt != _BOM:
-            return first_row
-
-        first_row_bom = first_row[0]
-
-        if len(first_row_bom) > 1 and first_row_bom[1] == self.quotechar:
-            start = 2
-            quote = first_row_bom[1]
-            end = first_row_bom[2:].index(quote) + 2
-
-            # Extract the data between the quotation marks
-            new_row = first_row_bom[start:end]
-
-            # Extract any remaining data after the second
-            # quotation mark.
-            if len(first_row_bom) > end + 1:
-                new_row += first_row_bom[end + 1 :]
-
-        else:
-
-            # No quotation so just remove BOM from first element
-            new_row = first_row_bom[1:]
-        return [new_row] + first_row[1:]
-
-    def _is_line_empty(self, line):
-        """
-        Check if a line is empty or not.
-
-        Parameters
-        ----------
-        line : str, array-like
-            The line of data to check.
-
-        Returns
-        -------
-        boolean : Whether or not the line is empty.
-        """
-        return not line or all(not x for x in line)
-
-    def _next_line(self):
-        if isinstance(self.data, list):
-            while self.skipfunc(self.pos):
-                self.pos += 1
-
-            while True:
-                try:
-                    line = self._check_comments([self.data[self.pos]])[0]
-                    self.pos += 1
-                    # either uncommented or blank to begin with
-                    if not self.skip_blank_lines and (
-                        self._is_line_empty(self.data[self.pos - 1]) or line
-                    ):
-                        break
-                    elif self.skip_blank_lines:
-                        ret = self._remove_empty_lines([line])
-                        if ret:
-                            line = ret[0]
-                            break
-                except IndexError:
-                    raise StopIteration
-        else:
-            while self.skipfunc(self.pos):
-                self.pos += 1
-                # assert for mypy, data is Iterator[str] or None, would error in next
-                assert self.data is not None
-                next(self.data)
-
-            while True:
-                orig_line = self._next_iter_line(row_num=self.pos + 1)
-                self.pos += 1
-
-                if orig_line is not None:
-                    line = self._check_comments([orig_line])[0]
-
-                    if self.skip_blank_lines:
-                        ret = self._remove_empty_lines([line])
-
-                        if ret:
-                            line = ret[0]
-                            break
-                    elif self._is_line_empty(orig_line) or line:
-                        break
-
-        # This was the first line of the file,
-        # which could contain the BOM at the
-        # beginning of it.
-        if self.pos == 1:
-            line = self._check_for_bom(line)
-
-        self.line_pos += 1
-        self.buf.append(line)
-        return line
-
-    def _alert_malformed(self, msg, row_num):
-        """
-        Alert a user about a malformed row.
-
-        If `self.error_bad_lines` is True, the alert will be `ParserError`.
-        If `self.warn_bad_lines` is True, the alert will be printed out.
-
-        Parameters
-        ----------
-        msg : The error message to display.
-        row_num : The row number where the parsing error occurred.
-                  Because this row number is displayed, we 1-index,
-                  even though we 0-index internally.
-        """
-        if self.error_bad_lines:
-            raise ParserError(msg)
-        elif self.warn_bad_lines:
-            base = f"Skipping line {row_num}: "
-            sys.stderr.write(base + msg + "\n")
-
-    def _next_iter_line(self, row_num):
-        """
-        Wrapper around iterating through `self.data` (CSV source).
-
-        When a CSV error is raised, we check for specific
-        error messages that allow us to customize the
-        error message displayed to the user.
-
-        Parameters
-        ----------
-        row_num : The row number of the line being parsed.
-        """
-        try:
-            # assert for mypy, data is Iterator[str] or None, would error in next
-            assert self.data is not None
-            return next(self.data)
-        except csv.Error as e:
-            if self.warn_bad_lines or self.error_bad_lines:
-                msg = str(e)
-
-                if "NULL byte" in msg or "line contains NUL" in msg:
-                    msg = (
-                        "NULL byte detected. This byte "
-                        "cannot be processed in Python's "
-                        "native csv library at the moment, "
-                        "so please pass in engine='c' instead"
-                    )
-
-                if self.skipfooter > 0:
-                    reason = (
-                        "Error could possibly be due to "
-                        "parsing errors in the skipped footer rows "
-                        "(the skipfooter keyword is only applied "
-                        "after Python's csv library has parsed "
-                        "all rows)."
-                    )
-                    msg += ". " + reason
-
-                self._alert_malformed(msg, row_num)
-            return None
-
-    def _check_comments(self, lines):
-        if self.comment is None:
-            return lines
-        ret = []
-        for line in lines:
-            rl = []
-            for x in line:
-                if (
-                    not isinstance(x, str)
-                    or self.comment not in x
-                    or x in self.na_values
-                ):
-                    rl.append(x)
-                else:
-                    x = x[: x.find(self.comment)]
-                    if len(x) > 0:
-                        rl.append(x)
-                    break
-            ret.append(rl)
-        return ret
-
-    def _remove_empty_lines(self, lines):
-        """
-        Iterate through the lines and remove any that are
-        either empty or contain only one whitespace value
-
-        Parameters
-        ----------
-        lines : array-like
-            The array of lines that we are to filter.
-
-        Returns
-        -------
-        filtered_lines : array-like
-            The same array of lines with the "empty" ones removed.
-        """
-        ret = []
-        for line in lines:
-            # Remove empty lines and lines with only one whitespace value
-            if (
-                len(line) > 1
-                or len(line) == 1
-                and (not isinstance(line[0], str) or line[0].strip())
-            ):
-                ret.append(line)
-        return ret
-
-    def _check_thousands(self, lines):
-        if self.thousands is None:
-            return lines
-
-        return self._search_replace_num_columns(
-            lines=lines, search=self.thousands, replace=""
-        )
-
-    def _search_replace_num_columns(self, lines, search, replace):
-        ret = []
-        for line in lines:
-            rl = []
-            for i, x in enumerate(line):
-                if (
-                    not isinstance(x, str)
-                    or search not in x
-                    or (self._no_thousands_columns and i in self._no_thousands_columns)
-                    or not self.num.search(x.strip())
-                ):
-                    rl.append(x)
-                else:
-                    rl.append(x.replace(search, replace))
-            ret.append(rl)
-        return ret
-
-    def _check_decimal(self, lines):
-        if self.decimal == _parser_defaults["decimal"]:
-            return lines
-
-        return self._search_replace_num_columns(
-            lines=lines, search=self.decimal, replace="."
-        )
-
-    def _clear_buffer(self):
-        self.buf = []
-
-    _implicit_index = False
-
-    def _get_index_name(self, columns):
-        """
-        Try several cases to get lines:
-
-        0) There are headers on row 0 and row 1 and their
-        total summed lengths equals the length of the next line.
-        Treat row 0 as columns and row 1 as indices
-        1) Look for implicit index: there are more columns
-        on row 1 than row 0. If this is true, assume that row
-        1 lists index columns and row 0 lists normal columns.
-        2) Get index from the columns if it was listed.
-        """
-        orig_names = list(columns)
-        columns = list(columns)
-
-        try:
-            line = self._next_line()
-        except StopIteration:
-            line = None
-
-        try:
-            next_line = self._next_line()
-        except StopIteration:
-            next_line = None
-
-        # implicitly index_col=0 b/c 1 fewer column names
-        implicit_first_cols = 0
-        if line is not None:
-            # leave it 0, #2442
-            # Case 1
-            if self.index_col is not False:
-                implicit_first_cols = len(line) - self.num_original_columns
-
-            # Case 0
-            if next_line is not None:
-                if len(next_line) == len(line) + self.num_original_columns:
-                    # column and index names on diff rows
-                    self.index_col = list(range(len(line)))
-                    self.buf = self.buf[1:]
-
-                    for c in reversed(line):
-                        columns.insert(0, c)
-
-                    # Update list of original names to include all indices.
-                    orig_names = list(columns)
-                    self.num_original_columns = len(columns)
-                    return line, orig_names, columns
-
-        if implicit_first_cols > 0:
-            # Case 1
-            self._implicit_index = True
-            if self.index_col is None:
-                self.index_col = list(range(implicit_first_cols))
-
-            index_name = None
-
-        else:
-            # Case 2
-            (index_name, columns_, self.index_col) = _clean_index_names(
-                columns, self.index_col, self.unnamed_cols
-            )
-
-        return index_name, orig_names, columns
-
-    def _rows_to_cols(self, content):
-        col_len = self.num_original_columns
-
-        if self._implicit_index:
-            col_len += len(self.index_col)
-
-        max_len = max(len(row) for row in content)
-
-        # Check that there are no rows with too many
-        # elements in their row (rows with too few
-        # elements are padded with NaN).
-        if max_len > col_len and self.index_col is not False and self.usecols is None:
-
-            footers = self.skipfooter if self.skipfooter else 0
-            bad_lines = []
-
-            iter_content = enumerate(content)
-            content_len = len(content)
-            content = []
-
-            for (i, l) in iter_content:
-                actual_len = len(l)
-
-                if actual_len > col_len:
-                    if self.error_bad_lines or self.warn_bad_lines:
-                        row_num = self.pos - (content_len - i + footers)
-                        bad_lines.append((row_num, actual_len))
-
-                        if self.error_bad_lines:
-                            break
-                else:
-                    content.append(l)
-
-            for row_num, actual_len in bad_lines:
-                msg = (
-                    f"Expected {col_len} fields in line {row_num + 1}, saw "
-                    f"{actual_len}"
-                )
-                if (
-                    self.delimiter
-                    and len(self.delimiter) > 1
-                    and self.quoting != csv.QUOTE_NONE
-                ):
-                    # see gh-13374
-                    reason = (
-                        "Error could possibly be due to quotes being "
-                        "ignored when a multi-char delimiter is used."
-                    )
-                    msg += ". " + reason
-
-                self._alert_malformed(msg, row_num + 1)
-
-        # see gh-13320
-        zipped_content = list(lib.to_object_array(content, min_width=col_len).T)
-
-        if self.usecols:
-            assert self._col_indices is not None
-            col_indices = self._col_indices
-
-            if self._implicit_index:
-                zipped_content = [
-                    a
-                    for i, a in enumerate(zipped_content)
-                    if (
-                        i < len(self.index_col)
-                        or i - len(self.index_col) in col_indices
-                    )
-                ]
-            else:
-                zipped_content = [
-                    a for i, a in enumerate(zipped_content) if i in col_indices
-                ]
-        return zipped_content
-
-    def _get_lines(self, rows=None):
-        lines = self.buf
-        new_rows = None
-
-        # already fetched some number
-        if rows is not None:
-            # we already have the lines in the buffer
-            if len(self.buf) >= rows:
-                new_rows, self.buf = self.buf[:rows], self.buf[rows:]
-
-            # need some lines
-            else:
-                rows -= len(self.buf)
-
-        if new_rows is None:
-            if isinstance(self.data, list):
-                if self.pos > len(self.data):
-                    raise StopIteration
-                if rows is None:
-                    new_rows = self.data[self.pos :]
-                    new_pos = len(self.data)
-                else:
-                    new_rows = self.data[self.pos : self.pos + rows]
-                    new_pos = self.pos + rows
-
-                # Check for stop rows. n.b.: self.skiprows is a set.
-                if self.skiprows:
-                    new_rows = [
-                        row
-                        for i, row in enumerate(new_rows)
-                        if not self.skipfunc(i + self.pos)
-                    ]
-
-                lines.extend(new_rows)
-                self.pos = new_pos
-
-            else:
-                new_rows = []
-                try:
-                    if rows is not None:
-                        for _ in range(rows):
-                            # assert for mypy, data is Iterator[str] or None, would
-                            # error in next
-                            assert self.data is not None
-                            new_rows.append(next(self.data))
-                        lines.extend(new_rows)
-                    else:
-                        rows = 0
-
-                        while True:
-                            new_row = self._next_iter_line(row_num=self.pos + rows + 1)
-                            rows += 1
-
-                            if new_row is not None:
-                                new_rows.append(new_row)
-
-                except StopIteration:
-                    if self.skiprows:
-                        new_rows = [
-                            row
-                            for i, row in enumerate(new_rows)
-                            if not self.skipfunc(i + self.pos)
-                        ]
-                    lines.extend(new_rows)
-                    if len(lines) == 0:
-                        raise
-                self.pos += len(new_rows)
-
-            self.buf = []
-        else:
-            lines = new_rows
-
-        if self.skipfooter:
-            lines = lines[: -self.skipfooter]
-
-        lines = self._check_comments(lines)
-        if self.skip_blank_lines:
-            lines = self._remove_empty_lines(lines)
-        lines = self._check_thousands(lines)
-        return self._check_decimal(lines)
-
-
-def _make_date_converter(
-    date_parser=None, dayfirst=False, infer_datetime_format=False, cache_dates=True
-):
-    def converter(*date_cols):
-        if date_parser is None:
-            strs = parsing.concat_date_cols(date_cols)
-
-            try:
-                return tools.to_datetime(
-                    ensure_object(strs),
-                    utc=None,
-                    dayfirst=dayfirst,
-                    errors="ignore",
-                    infer_datetime_format=infer_datetime_format,
-                    cache=cache_dates,
-                ).to_numpy()
-
-            except ValueError:
-                return tools.to_datetime(
-                    parsing.try_parse_dates(strs, dayfirst=dayfirst), cache=cache_dates
-                )
-        else:
-            try:
-                result = tools.to_datetime(
-                    date_parser(*date_cols), errors="ignore", cache=cache_dates
-                )
-                if isinstance(result, datetime.datetime):
-                    raise Exception("scalar parser")
-                return result
-            except Exception:
-                try:
-                    return tools.to_datetime(
-                        parsing.try_parse_dates(
-                            parsing.concat_date_cols(date_cols),
-                            parser=date_parser,
-                            dayfirst=dayfirst,
-                        ),
-                        errors="ignore",
-                    )
-                except Exception:
-                    return generic_parser(date_parser, *date_cols)
-
-    return converter
-
-
-def _process_date_conversion(
-    data_dict,
-    converter,
-    parse_spec,
-    index_col,
-    index_names,
-    columns,
-    keep_date_col=False,
-):
-    def _isindex(colspec):
-        return (isinstance(index_col, list) and colspec in index_col) or (
-            isinstance(index_names, list) and colspec in index_names
-        )
-
-    new_cols = []
-    new_data = {}
-
-    orig_names = columns
-    columns = list(columns)
-
-    date_cols = set()
-
-    if parse_spec is None or isinstance(parse_spec, bool):
-        return data_dict, columns
-
-    if isinstance(parse_spec, list):
-        # list of column lists
-        for colspec in parse_spec:
-            if is_scalar(colspec):
-                if isinstance(colspec, int) and colspec not in data_dict:
-                    colspec = orig_names[colspec]
-                if _isindex(colspec):
-                    continue
-                data_dict[colspec] = converter(data_dict[colspec])
-            else:
-                new_name, col, old_names = _try_convert_dates(
-                    converter, colspec, data_dict, orig_names
-                )
-                if new_name in data_dict:
-                    raise ValueError(f"New date column already in dict {new_name}")
-                new_data[new_name] = col
-                new_cols.append(new_name)
-                date_cols.update(old_names)
-
-    elif isinstance(parse_spec, dict):
-        # dict of new name to column list
-        for new_name, colspec in parse_spec.items():
-            if new_name in data_dict:
-                raise ValueError(f"Date column {new_name} already in dict")
-
-            _, col, old_names = _try_convert_dates(
-                converter, colspec, data_dict, orig_names
-            )
-
-            new_data[new_name] = col
-            new_cols.append(new_name)
-            date_cols.update(old_names)
-
-    data_dict.update(new_data)
-    new_cols.extend(columns)
-
-    if not keep_date_col:
-        for c in list(date_cols):
-            data_dict.pop(c)
-            new_cols.remove(c)
-
-    return data_dict, new_cols
-
-
-def _try_convert_dates(parser, colspec, data_dict, columns):
-    colset = set(columns)
-    colnames = []
-
-    for c in colspec:
-        if c in colset:
-            colnames.append(c)
-        elif isinstance(c, int) and c not in columns:
-            colnames.append(columns[c])
-        else:
-            colnames.append(c)
-
-    new_name = "_".join(str(x) for x in colnames)
-    to_parse = [data_dict[c] for c in colnames if c in data_dict]
-
-    new_col = parser(*to_parse)
-    return new_name, new_col, colnames
-
-
-def _clean_na_values(na_values, keep_default_na=True):
-
-    if na_values is None:
-        if keep_default_na:
-            na_values = STR_NA_VALUES
-        else:
-            na_values = set()
-        # pandas\io\parsers.py:3387: error: Need type annotation for
-        # 'na_fvalues' (hint: "na_fvalues: Set[<type>] = ...")  [var-annotated]
-        na_fvalues = set()  # type: ignore[var-annotated]
-    elif isinstance(na_values, dict):
-        old_na_values = na_values.copy()
-        na_values = {}  # Prevent aliasing.
-
-        # Convert the values in the na_values dictionary
-        # into array-likes for further use. This is also
-        # where we append the default NaN values, provided
-        # that `keep_default_na=True`.
-        for k, v in old_na_values.items():
-            if not is_list_like(v):
-                v = [v]
-
-            if keep_default_na:
-                v = set(v) | STR_NA_VALUES
-
-            na_values[k] = v
-        # pandas\io\parsers.py:3404: error: Incompatible types in assignment
-        # (expression has type "Dict[Any, Any]", variable has type "Set[Any]")
-        # [assignment]
-        na_fvalues = {  # type: ignore[assignment]
-            k: _floatify_na_values(v) for k, v in na_values.items()
-        }
-    else:
-        if not is_list_like(na_values):
-            na_values = [na_values]
-        na_values = _stringify_na_values(na_values)
-        if keep_default_na:
-            na_values = na_values | STR_NA_VALUES
-
-        na_fvalues = _floatify_na_values(na_values)
-
-    return na_values, na_fvalues
-
-
-def _clean_index_names(columns, index_col, unnamed_cols):
-    if not _is_index_col(index_col):
-        return None, columns, index_col
-
-    columns = list(columns)
-
-    # In case of no rows and multiindex columns we have to set index_names to
-    # list of Nones GH#38292
-    if not columns:
-        return [None] * len(index_col), columns, index_col
-
-    cp_cols = list(columns)
-    index_names = []
-
-    # don't mutate
-    index_col = list(index_col)
-
-    for i, c in enumerate(index_col):
-        if isinstance(c, str):
-            index_names.append(c)
-            for j, name in enumerate(cp_cols):
-                if name == c:
-                    index_col[i] = j
-                    columns.remove(name)
-                    break
-        else:
-            name = cp_cols[c]
-            columns.remove(name)
-            index_names.append(name)
-
-    # Only clean index names that were placeholders.
-    for i, name in enumerate(index_names):
-        if isinstance(name, str) and name in unnamed_cols:
-            # pandas\io\parsers.py:3445: error: No overload variant of
-            # "__setitem__" of "list" matches argument types "int", "None"
-            # [call-overload]
-            index_names[i] = None  # type: ignore[call-overload]
-
-    return index_names, columns, index_col
-
-
-def _get_empty_meta(columns, index_col, index_names, dtype: Optional[DtypeArg] = None):
-    columns = list(columns)
-
-    # Convert `dtype` to a defaultdict of some kind.
-    # This will enable us to write `dtype[col_name]`
-    # without worrying about KeyError issues later on.
-    if not is_dict_like(dtype):
-        # if dtype == None, default will be object.
-        default_dtype = dtype or object
-        dtype = defaultdict(lambda: default_dtype)
-    else:
-        dtype = cast(dict, dtype)
-        dtype = defaultdict(
-            lambda: object,
-            {columns[k] if is_integer(k) else k: v for k, v in dtype.items()},
-        )
-
-    # Even though we have no data, the "index" of the empty DataFrame
-    # could for example still be an empty MultiIndex. Thus, we need to
-    # check whether we have any index columns specified, via either:
-    #
-    # 1) index_col (column indices)
-    # 2) index_names (column names)
-    #
-    # Both must be non-null to ensure a successful construction. Otherwise,
-    # we have to create a generic empty Index.
-    if (index_col is None or index_col is False) or index_names is None:
-        index = Index([])
-    else:
-        data = [Series([], dtype=dtype[name]) for name in index_names]
-        index = ensure_index_from_sequences(data, names=index_names)
-        index_col.sort()
-
-        for i, n in enumerate(index_col):
-            columns.pop(n - i)
-
-    col_dict = {col_name: Series([], dtype=dtype[col_name]) for col_name in columns}
-
-    return index, columns, col_dict
-
-
-def _floatify_na_values(na_values):
-    # create float versions of the na_values
-    result = set()
-    for v in na_values:
-        try:
-            v = float(v)
-            if not np.isnan(v):
-                result.add(v)
-        except (TypeError, ValueError, OverflowError):
-            pass
-    return result
-
-
-def _stringify_na_values(na_values):
-    """ return a stringified and numeric for these values """
-    result = []
-    for x in na_values:
-        result.append(str(x))
-        result.append(x)
-        try:
-            v = float(x)
-
-            # we are like 999 here
-            if v == int(v):
-                v = int(v)
-                result.append(f"{v}.0")
-                result.append(str(v))
-
-            # pandas\io\parsers.py:3522: error: Argument 1 to "append" of
-            # "list" has incompatible type "float"; expected "str"  [arg-type]
-            result.append(v)  # type: ignore[arg-type]
-        except (TypeError, ValueError, OverflowError):
-            pass
-        try:
-            # pandas\io\parsers.py:3526: error: Argument 1 to "append" of
-            # "list" has incompatible type "int"; expected "str"  [arg-type]
-            result.append(int(x))  # type: ignore[arg-type]
-        except (TypeError, ValueError, OverflowError):
-            pass
-    return set(result)
-
-
-def _get_na_values(col, na_values, na_fvalues, keep_default_na):
-    """
-    Get the NaN values for a given column.
-
-    Parameters
-    ----------
-    col : str
-        The name of the column.
-    na_values : array-like, dict
-        The object listing the NaN values as strings.
-    na_fvalues : array-like, dict
-        The object listing the NaN values as floats.
-    keep_default_na : bool
-        If `na_values` is a dict, and the column is not mapped in the
-        dictionary, whether to return the default NaN values or the empty set.
-
-    Returns
-    -------
-    nan_tuple : A length-two tuple composed of
-
-        1) na_values : the string NaN values for that column.
-        2) na_fvalues : the float NaN values for that column.
-    """
-    if isinstance(na_values, dict):
-        if col in na_values:
-            return na_values[col], na_fvalues[col]
-        else:
-            if keep_default_na:
-                return STR_NA_VALUES, set()
-
-            return set(), set()
-    else:
-        return na_values, na_fvalues
-
-
-def _get_col_names(colspec, columns):
-    colset = set(columns)
-    colnames = []
-    for c in colspec:
-        if c in colset:
-            colnames.append(c)
-        elif isinstance(c, int):
-            colnames.append(columns[c])
-    return colnames
-
-
-class FixedWidthReader(abc.Iterator):
-    """
-    A reader of fixed-width lines.
-    """
-
-    def __init__(self, f, colspecs, delimiter, comment, skiprows=None, infer_nrows=100):
-        self.f = f
-        self.buffer = None
-        self.delimiter = "\r\n" + delimiter if delimiter else "\n\r\t "
-        self.comment = comment
-        if colspecs == "infer":
-            self.colspecs = self.detect_colspecs(
-                infer_nrows=infer_nrows, skiprows=skiprows
-            )
-        else:
-            self.colspecs = colspecs
-
-        if not isinstance(self.colspecs, (tuple, list)):
-            raise TypeError(
-                "column specifications must be a list or tuple, "
-                f"input was a {type(colspecs).__name__}"
-            )
-
-        for colspec in self.colspecs:
-            if not (
-                isinstance(colspec, (tuple, list))
-                and len(colspec) == 2
-                and isinstance(colspec[0], (int, np.integer, type(None)))
-                and isinstance(colspec[1], (int, np.integer, type(None)))
-            ):
-                raise TypeError(
-                    "Each column specification must be "
-                    "2 element tuple or list of integers"
-                )
-
-    def get_rows(self, infer_nrows, skiprows=None):
-        """
-        Read rows from self.f, skipping as specified.
-
-        We distinguish buffer_rows (the first <= infer_nrows
-        lines) from the rows returned to detect_colspecs
-        because it's simpler to leave the other locations
-        with skiprows logic alone than to modify them to
-        deal with the fact we skipped some rows here as
-        well.
-
-        Parameters
-        ----------
-        infer_nrows : int
-            Number of rows to read from self.f, not counting
-            rows that are skipped.
-        skiprows: set, optional
-            Indices of rows to skip.
-
-        Returns
-        -------
-        detect_rows : list of str
-            A list containing the rows to read.
-
-        """
-        if skiprows is None:
-            skiprows = set()
-        buffer_rows = []
-        detect_rows = []
-        for i, row in enumerate(self.f):
-            if i not in skiprows:
-                detect_rows.append(row)
-            buffer_rows.append(row)
-            if len(detect_rows) >= infer_nrows:
-                break
-        self.buffer = iter(buffer_rows)
-        return detect_rows
-
-    def detect_colspecs(self, infer_nrows=100, skiprows=None):
-        # Regex escape the delimiters
-        delimiters = "".join(fr"\{x}" for x in self.delimiter)
-        pattern = re.compile(f"([^{delimiters}]+)")
-        rows = self.get_rows(infer_nrows, skiprows)
-        if not rows:
-            raise EmptyDataError("No rows from which to infer column width")
-        max_len = max(map(len, rows))
-        mask = np.zeros(max_len + 1, dtype=int)
-        if self.comment is not None:
-            rows = [row.partition(self.comment)[0] for row in rows]
-        for row in rows:
-            for m in pattern.finditer(row):
-                mask[m.start() : m.end()] = 1
-        shifted = np.roll(mask, 1)
-        shifted[0] = 0
-        edges = np.where((mask ^ shifted) == 1)[0]
-        edge_pairs = list(zip(edges[::2], edges[1::2]))
-        return edge_pairs
-
-    def __next__(self):
-        if self.buffer is not None:
-            try:
-                line = next(self.buffer)
-            except StopIteration:
-                self.buffer = None
-                line = next(self.f)
-        else:
-            line = next(self.f)
-        # Note: 'colspecs' is a sequence of half-open intervals.
-        return [line[fromm:to].strip(self.delimiter) for (fromm, to) in self.colspecs]
-
-
-class FixedWidthFieldParser(PythonParser):
-    """
-    Specialization that Converts fixed-width fields into DataFrames.
-    See PythonParser for details.
-    """
-
-    def __init__(self, f, **kwds):
-        # Support iterators, convert to a list.
-        self.colspecs = kwds.pop("colspecs")
-        self.infer_nrows = kwds.pop("infer_nrows")
-        PythonParser.__init__(self, f, **kwds)
-
-    def _make_reader(self, f):
-        self.data = FixedWidthReader(
-            f,
-            self.colspecs,
-            self.delimiter,
-            self.comment,
-            self.skiprows,
-            self.infer_nrows,
-        )
-
-    def _remove_empty_lines(self, lines) -> List:
-        """
-        Returns the list of lines without the empty ones. With fixed-width
-        fields, empty lines become arrays of empty strings.
-
-        See PythonParser._remove_empty_lines.
-        """
-        return [
-            line
-            for line in lines
-            if any(not isinstance(e, str) or e.strip() for e in line)
-        ]
-
-
-def _refine_defaults_read(
-    dialect: Union[str, csv.Dialect],
-    delimiter: Union[str, object],
-    delim_whitespace: bool,
-    engine: str,
-    sep: Union[str, object],
-    defaults: Dict[str, Any],
-):
-    """Validate/refine default values of input parameters of read_csv, read_table.
-
-    Parameters
-    ----------
-    dialect : str or csv.Dialect
-        If provided, this parameter will override values (default or not) for the
-        following parameters: `delimiter`, `doublequote`, `escapechar`,
-        `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
-        override values, a ParserWarning will be issued. See csv.Dialect
-        documentation for more details.
-    delimiter : str or object
-        Alias for sep.
-    delim_whitespace : bool
-        Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be
-        used as the sep. Equivalent to setting ``sep='\\s+'``. If this option
-        is set to True, nothing should be passed in for the ``delimiter``
-        parameter.
-    engine : {{'c', 'python'}}
-        Parser engine to use. The C engine is faster while the python engine is
-        currently more feature-complete.
-    sep : str or object
-        A delimiter provided by the user (str) or a sentinel value, i.e.
-        pandas._libs.lib.no_default.
-    defaults: dict
-        Default values of input parameters.
-
-    Returns
-    -------
-    kwds : dict
-        Input parameters with correct values.
-
-    Raises
-    ------
-    ValueError : If a delimiter was specified with ``sep`` (or ``delimiter``) and
-        ``delim_whitespace=True``.
-    """
-    # fix types for sep, delimiter to Union(str, Any)
-    delim_default = defaults["delimiter"]
-    kwds: Dict[str, Any] = {}
-    # gh-23761
-    #
-    # When a dialect is passed, it overrides any of the overlapping
-    # parameters passed in directly. We don't want to warn if the
-    # default parameters were passed in (since it probably means
-    # that the user didn't pass them in explicitly in the first place).
-    #
-    # "delimiter" is the annoying corner case because we alias it to
-    # "sep" before doing comparison to the dialect values later on.
-    # Thus, we need a flag to indicate that we need to "override"
-    # the comparison to dialect values by checking if default values
-    # for BOTH "delimiter" and "sep" were provided.
-    if dialect is not None:
-        kwds["sep_override"] = delimiter is None and (
-            sep is lib.no_default or sep == delim_default
-        )
-
-    # Alias sep -> delimiter.
-    if delimiter is None:
-        delimiter = sep
-
-    if delim_whitespace and (delimiter is not lib.no_default):
-        raise ValueError(
-            "Specified a delimiter with both sep and "
-            "delim_whitespace=True; you can only specify one."
-        )
-
-    if delimiter is lib.no_default:
-        # assign default separator value
-        kwds["delimiter"] = delim_default
-    else:
-        kwds["delimiter"] = delimiter
-
-    if engine is not None:
-        kwds["engine_specified"] = True
-    else:
-        kwds["engine"] = "c"
-        kwds["engine_specified"] = False
-
-    return kwds
-
-
-def _extract_dialect(kwds: Dict[str, Any]) -> Optional[csv.Dialect]:
-    """
-    Extract concrete csv dialect instance.
-
-    Returns
-    -------
-    csv.Dialect or None
-    """
-    if kwds.get("dialect") is None:
-        return None
-
-    dialect = kwds["dialect"]
-    if dialect in csv.list_dialects():
-        dialect = csv.get_dialect(dialect)
-
-    _validate_dialect(dialect)
-
-    return dialect
-
-
-MANDATORY_DIALECT_ATTRS = (
-    "delimiter",
-    "doublequote",
-    "escapechar",
-    "skipinitialspace",
-    "quotechar",
-    "quoting",
-)
-
-
-def _validate_dialect(dialect: csv.Dialect) -> None:
-    """
-    Validate csv dialect instance.
-
-    Raises
-    ------
-    ValueError
-        If incorrect dialect is provided.
-    """
-    for param in MANDATORY_DIALECT_ATTRS:
-        if not hasattr(dialect, param):
-            raise ValueError(f"Invalid dialect {dialect} provided")
-
-
-def _merge_with_dialect_properties(
-    dialect: csv.Dialect,
-    defaults: Dict[str, Any],
-) -> Dict[str, Any]:
-    """
-    Merge default kwargs in TextFileReader with dialect parameters.
-
-    Parameters
-    ----------
-    dialect : csv.Dialect
-        Concrete csv dialect. See csv.Dialect documentation for more details.
-    defaults : dict
-        Keyword arguments passed to TextFileReader.
-
-    Returns
-    -------
-    kwds : dict
-        Updated keyword arguments, merged with dialect parameters.
-    """
-    kwds = defaults.copy()
-
-    for param in MANDATORY_DIALECT_ATTRS:
-        dialect_val = getattr(dialect, param)
-
-        parser_default = _parser_defaults[param]
-        provided = kwds.get(param, parser_default)
-
-        # Messages for conflicting values between the dialect
-        # instance and the actual parameters provided.
-        conflict_msgs = []
-
-        # Don't warn if the default parameter was passed in,
-        # even if it conflicts with the dialect (gh-23761).
-        if provided != parser_default and provided != dialect_val:
-            msg = (
-                f"Conflicting values for '{param}': '{provided}' was "
-                f"provided, but the dialect specifies '{dialect_val}'. "
-                "Using the dialect-specified value."
-            )
-
-            # Annoying corner case for not warning about
-            # conflicts between dialect and delimiter parameter.
-            # Refer to the outer "_read_" function for more info.
-            if not (param == "delimiter" and kwds.pop("sep_override", False)):
-                conflict_msgs.append(msg)
-
-        if conflict_msgs:
-            warnings.warn("\n\n".join(conflict_msgs), ParserWarning, stacklevel=2)
-        kwds[param] = dialect_val
-    return kwds
-
-
-def _validate_skipfooter(kwds: Dict[str, Any]) -> None:
-    """
-    Check whether skipfooter is compatible with other kwargs in TextFileReader.
-
-    Parameters
-    ----------
-    kwds : dict
-        Keyword arguments passed to TextFileReader.
-
-    Raises
-    ------
-    ValueError
-        If skipfooter is not compatible with other parameters.
-    """
-    if kwds.get("skipfooter"):
-        if kwds.get("iterator") or kwds.get("chunksize"):
-            raise ValueError("'skipfooter' not supported for iteration")
-        if kwds.get("nrows"):
-            raise ValueError("'skipfooter' not supported with 'nrows'")
diff --git a/pandas/io/parsers/__init__.py b/pandas/io/parsers/__init__.py
new file mode 100644
index 0000000000000..ff11968db15f0
--- /dev/null
+++ b/pandas/io/parsers/__init__.py
@@ -0,0 +1,9 @@
+from pandas.io.parsers.readers import (
+    TextFileReader,
+    TextParser,
+    read_csv,
+    read_fwf,
+    read_table,
+)
+
+__all__ = ["TextFileReader", "TextParser", "read_csv", "read_fwf", "read_table"]
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
new file mode 100644
index 0000000000000..64c3b1e64a659
--- /dev/null
+++ b/pandas/io/parsers/base_parser.py
@@ -0,0 +1,1085 @@
+from collections import defaultdict
+import csv
+import datetime
+import itertools
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Union, cast
+import warnings
+
+import numpy as np
+
+import pandas._libs.lib as lib
+import pandas._libs.ops as libops
+import pandas._libs.parsers as parsers
+from pandas._libs.parsers import STR_NA_VALUES
+from pandas._libs.tslibs import parsing
+from pandas._typing import DtypeArg, FilePathOrBuffer
+from pandas.errors import ParserError, ParserWarning
+
+from pandas.core.dtypes.cast import astype_nansafe
+from pandas.core.dtypes.common import (
+    ensure_object,
+    ensure_str,
+    is_bool_dtype,
+    is_categorical_dtype,
+    is_dict_like,
+    is_dtype_equal,
+    is_extension_array_dtype,
+    is_integer,
+    is_integer_dtype,
+    is_list_like,
+    is_object_dtype,
+    is_scalar,
+    is_string_dtype,
+    pandas_dtype,
+)
+from pandas.core.dtypes.dtypes import CategoricalDtype
+from pandas.core.dtypes.missing import isna
+
+from pandas.core import algorithms
+from pandas.core.arrays import Categorical
+from pandas.core.indexes.api import Index, MultiIndex, ensure_index_from_sequences
+from pandas.core.series import Series
+from pandas.core.tools import datetimes as tools
+
+from pandas.io.common import IOHandles, get_handle
+from pandas.io.date_converters import generic_parser
+
+parser_defaults = {
+    "delimiter": None,
+    "escapechar": None,
+    "quotechar": '"',
+    "quoting": csv.QUOTE_MINIMAL,
+    "doublequote": True,
+    "skipinitialspace": False,
+    "lineterminator": None,
+    "header": "infer",
+    "index_col": None,
+    "names": None,
+    "prefix": None,
+    "skiprows": None,
+    "skipfooter": 0,
+    "nrows": None,
+    "na_values": None,
+    "keep_default_na": True,
+    "true_values": None,
+    "false_values": None,
+    "converters": None,
+    "dtype": None,
+    "cache_dates": True,
+    "thousands": None,
+    "comment": None,
+    "decimal": ".",
+    # 'engine': 'c',
+    "parse_dates": False,
+    "keep_date_col": False,
+    "dayfirst": False,
+    "date_parser": None,
+    "usecols": None,
+    # 'iterator': False,
+    "chunksize": None,
+    "verbose": False,
+    "encoding": None,
+    "squeeze": False,
+    "compression": None,
+    "mangle_dupe_cols": True,
+    "infer_datetime_format": False,
+    "skip_blank_lines": True,
+}
+
+
+class ParserBase:
+    def __init__(self, kwds):
+
+        self.names = kwds.get("names")
+        self.orig_names: Optional[List] = None
+        self.prefix = kwds.pop("prefix", None)
+
+        self.index_col = kwds.get("index_col", None)
+        self.unnamed_cols: Set = set()
+        self.index_names: Optional[List] = None
+        self.col_names = None
+
+        self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False))
+        self.date_parser = kwds.pop("date_parser", None)
+        self.dayfirst = kwds.pop("dayfirst", False)
+        self.keep_date_col = kwds.pop("keep_date_col", False)
+
+        self.na_values = kwds.get("na_values")
+        self.na_fvalues = kwds.get("na_fvalues")
+        self.na_filter = kwds.get("na_filter", False)
+        self.keep_default_na = kwds.get("keep_default_na", True)
+
+        self.true_values = kwds.get("true_values")
+        self.false_values = kwds.get("false_values")
+        self.mangle_dupe_cols = kwds.get("mangle_dupe_cols", True)
+        self.infer_datetime_format = kwds.pop("infer_datetime_format", False)
+        self.cache_dates = kwds.pop("cache_dates", True)
+
+        self._date_conv = _make_date_converter(
+            date_parser=self.date_parser,
+            dayfirst=self.dayfirst,
+            infer_datetime_format=self.infer_datetime_format,
+            cache_dates=self.cache_dates,
+        )
+
+        # validate header options for mi
+        self.header = kwds.get("header")
+        if isinstance(self.header, (list, tuple, np.ndarray)):
+            if not all(map(is_integer, self.header)):
+                raise ValueError("header must be integer or list of integers")
+            if any(i < 0 for i in self.header):
+                raise ValueError(
+                    "cannot specify multi-index header with negative integers"
+                )
+            if kwds.get("usecols"):
+                raise ValueError(
+                    "cannot specify usecols when specifying a multi-index header"
+                )
+            if kwds.get("names"):
+                raise ValueError(
+                    "cannot specify names when specifying a multi-index header"
+                )
+
+            # validate index_col that only contains integers
+            if self.index_col is not None:
+                is_sequence = isinstance(self.index_col, (list, tuple, np.ndarray))
+                if not (
+                    is_sequence
+                    and all(map(is_integer, self.index_col))
+                    or is_integer(self.index_col)
+                ):
+                    raise ValueError(
+                        "index_col must only contain row numbers "
+                        "when specifying a multi-index header"
+                    )
+        elif self.header is not None:
+            # GH 27394
+            if self.prefix is not None:
+                raise ValueError(
+                    "Argument prefix must be None if argument header is not None"
+                )
+            # GH 16338
+            elif not is_integer(self.header):
+                raise ValueError("header must be integer or list of integers")
+            # GH 27779
+            elif self.header < 0:
+                raise ValueError(
+                    "Passing negative integer to header is invalid. "
+                    "For no header, use header=None instead"
+                )
+
+        self._name_processed = False
+
+        self._first_chunk = True
+
+        self.handles: Optional[IOHandles] = None
+
+    def _open_handles(self, src: FilePathOrBuffer, kwds: Dict[str, Any]) -> None:
+        """
+        Let the readers open IOHanldes after they are done with their potential raises.
+        """
+        self.handles = get_handle(
+            src,
+            "r",
+            encoding=kwds.get("encoding", None),
+            compression=kwds.get("compression", None),
+            memory_map=kwds.get("memory_map", False),
+            storage_options=kwds.get("storage_options", None),
+        )
+
+    def _validate_parse_dates_presence(self, columns: List[str]) -> None:
+        """
+        Check if parse_dates are in columns.
+
+        If user has provided names for parse_dates, check if those columns
+        are available.
+
+        Parameters
+        ----------
+        columns : list
+            List of names of the dataframe.
+
+        Raises
+        ------
+        ValueError
+            If column to parse_date is not in dataframe.
+
+        """
+        cols_needed: Iterable
+        if is_dict_like(self.parse_dates):
+            cols_needed = itertools.chain(*self.parse_dates.values())
+        elif is_list_like(self.parse_dates):
+            # a column in parse_dates could be represented
+            # ColReference = Union[int, str]
+            # DateGroups = List[ColReference]
+            # ParseDates = Union[DateGroups, List[DateGroups],
+            #     Dict[ColReference, DateGroups]]
+            cols_needed = itertools.chain.from_iterable(
+                col if is_list_like(col) else [col] for col in self.parse_dates
+            )
+        else:
+            cols_needed = []
+
+        # get only columns that are references using names (str), not by index
+        missing_cols = ", ".join(
+            sorted(
+                {
+                    col
+                    for col in cols_needed
+                    if isinstance(col, str) and col not in columns
+                }
+            )
+        )
+        if missing_cols:
+            raise ValueError(
+                f"Missing column provided to 'parse_dates': '{missing_cols}'"
+            )
+
+    def close(self):
+        if self.handles is not None:
+            self.handles.close()
+
+    @property
+    def _has_complex_date_col(self):
+        return isinstance(self.parse_dates, dict) or (
+            isinstance(self.parse_dates, list)
+            and len(self.parse_dates) > 0
+            and isinstance(self.parse_dates[0], list)
+        )
+
+    def _should_parse_dates(self, i):
+        if isinstance(self.parse_dates, bool):
+            return self.parse_dates
+        else:
+            if self.index_names is not None:
+                name = self.index_names[i]
+            else:
+                name = None
+            j = i if self.index_col is None else self.index_col[i]
+
+            if is_scalar(self.parse_dates):
+                return (j == self.parse_dates) or (
+                    name is not None and name == self.parse_dates
+                )
+            else:
+                return (j in self.parse_dates) or (
+                    name is not None and name in self.parse_dates
+                )
+
+    def _extract_multi_indexer_columns(
+        self, header, index_names, col_names, passed_names=False
+    ):
+        """
+        extract and return the names, index_names, col_names
+        header is a list-of-lists returned from the parsers
+        """
+        if len(header) < 2:
+            return header[0], index_names, col_names, passed_names
+
+        # the names are the tuples of the header that are not the index cols
+        # 0 is the name of the index, assuming index_col is a list of column
+        # numbers
+        ic = self.index_col
+        if ic is None:
+            ic = []
+
+        if not isinstance(ic, (list, tuple, np.ndarray)):
+            ic = [ic]
+        sic = set(ic)
+
+        # clean the index_names
+        index_names = header.pop(-1)
+        index_names, _, _ = self._clean_index_names(
+            index_names, self.index_col, self.unnamed_cols
+        )
+
+        # extract the columns
+        field_count = len(header[0])
+
+        def extract(r):
+            return tuple(r[i] for i in range(field_count) if i not in sic)
+
+        columns = list(zip(*(extract(r) for r in header)))
+        names = ic + columns
+
+        # If we find unnamed columns all in a single
+        # level, then our header was too long.
+        for n in range(len(columns[0])):
+            if all(ensure_str(col[n]) in self.unnamed_cols for col in columns):
+                header = ",".join(str(x) for x in self.header)
+                raise ParserError(
+                    f"Passed header=[{header}] are too many rows "
+                    "for this multi_index of columns"
+                )
+
+        # Clean the column names (if we have an index_col).
+        if len(ic):
+            col_names = [
+                r[0] if ((r[0] is not None) and r[0] not in self.unnamed_cols) else None
+                for r in header
+            ]
+        else:
+            col_names = [None] * len(header)
+
+        passed_names = True
+
+        return names, index_names, col_names, passed_names
+
+    def _maybe_dedup_names(self, names):
+        # see gh-7160 and gh-9424: this helps to provide
+        # immediate alleviation of the duplicate names
+        # issue and appears to be satisfactory to users,
+        # but ultimately, not needing to butcher the names
+        # would be nice!
+        if self.mangle_dupe_cols:
+            names = list(names)  # so we can index
+            # pandas\io\parsers.py:1559: error: Need type annotation for
+            # 'counts'  [var-annotated]
+            counts = defaultdict(int)  # type: ignore[var-annotated]
+            is_potential_mi = _is_potential_multi_index(names, self.index_col)
+
+            for i, col in enumerate(names):
+                cur_count = counts[col]
+
+                while cur_count > 0:
+                    counts[col] = cur_count + 1
+
+                    if is_potential_mi:
+                        col = col[:-1] + (f"{col[-1]}.{cur_count}",)
+                    else:
+                        col = f"{col}.{cur_count}"
+                    cur_count = counts[col]
+
+                names[i] = col
+                counts[col] = cur_count + 1
+
+        return names
+
+    def _maybe_make_multi_index_columns(self, columns, col_names=None):
+        # possibly create a column mi here
+        if _is_potential_multi_index(columns):
+            columns = MultiIndex.from_tuples(columns, names=col_names)
+        return columns
+
+    def _make_index(self, data, alldata, columns, indexnamerow=False):
+        if not is_index_col(self.index_col) or not self.index_col:
+            index = None
+
+        elif not self._has_complex_date_col:
+            index = self._get_simple_index(alldata, columns)
+            index = self._agg_index(index)
+        elif self._has_complex_date_col:
+            if not self._name_processed:
+                (self.index_names, _, self.index_col) = self._clean_index_names(
+                    list(columns), self.index_col, self.unnamed_cols
+                )
+                self._name_processed = True
+            index = self._get_complex_date_index(data, columns)
+            index = self._agg_index(index, try_parse_dates=False)
+
+        # add names for the index
+        if indexnamerow:
+            coffset = len(indexnamerow) - len(columns)
+            # pandas\io\parsers.py:1604: error: Item "None" of "Optional[Any]"
+            # has no attribute "set_names"  [union-attr]
+            index = index.set_names(indexnamerow[:coffset])  # type: ignore[union-attr]
+
+        # maybe create a mi on the columns
+        columns = self._maybe_make_multi_index_columns(columns, self.col_names)
+
+        return index, columns
+
+    _implicit_index = False
+
+    def _get_simple_index(self, data, columns):
+        def ix(col):
+            if not isinstance(col, str):
+                return col
+            raise ValueError(f"Index {col} invalid")
+
+        to_remove = []
+        index = []
+        for idx in self.index_col:
+            i = ix(idx)
+            to_remove.append(i)
+            index.append(data[i])
+
+        # remove index items from content and columns, don't pop in
+        # loop
+        for i in sorted(to_remove, reverse=True):
+            data.pop(i)
+            if not self._implicit_index:
+                columns.pop(i)
+
+        return index
+
+    def _get_complex_date_index(self, data, col_names):
+        def _get_name(icol):
+            if isinstance(icol, str):
+                return icol
+
+            if col_names is None:
+                raise ValueError(f"Must supply column order to use {icol!s} as index")
+
+            for i, c in enumerate(col_names):
+                if i == icol:
+                    return c
+
+        to_remove = []
+        index = []
+        for idx in self.index_col:
+            name = _get_name(idx)
+            to_remove.append(name)
+            index.append(data[name])
+
+        # remove index items from content and columns, don't pop in
+        # loop
+        for c in sorted(to_remove, reverse=True):
+            data.pop(c)
+            col_names.remove(c)
+
+        return index
+
+    def _agg_index(self, index, try_parse_dates=True) -> Index:
+        arrays = []
+
+        for i, arr in enumerate(index):
+
+            if try_parse_dates and self._should_parse_dates(i):
+                arr = self._date_conv(arr)
+
+            if self.na_filter:
+                col_na_values = self.na_values
+                col_na_fvalues = self.na_fvalues
+            else:
+                col_na_values = set()
+                col_na_fvalues = set()
+
+            if isinstance(self.na_values, dict):
+                # pandas\io\parsers.py:1678: error: Value of type
+                # "Optional[Any]" is not indexable  [index]
+                col_name = self.index_names[i]  # type: ignore[index]
+                if col_name is not None:
+                    col_na_values, col_na_fvalues = _get_na_values(
+                        col_name, self.na_values, self.na_fvalues, self.keep_default_na
+                    )
+
+            arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
+            arrays.append(arr)
+
+        names = self.index_names
+        index = ensure_index_from_sequences(arrays, names)
+
+        return index
+
+    def _convert_to_ndarrays(
+        self, dct, na_values, na_fvalues, verbose=False, converters=None, dtypes=None
+    ):
+        result = {}
+        for c, values in dct.items():
+            conv_f = None if converters is None else converters.get(c, None)
+            if isinstance(dtypes, dict):
+                cast_type = dtypes.get(c, None)
+            else:
+                # single dtype or None
+                cast_type = dtypes
+
+            if self.na_filter:
+                col_na_values, col_na_fvalues = _get_na_values(
+                    c, na_values, na_fvalues, self.keep_default_na
+                )
+            else:
+                col_na_values, col_na_fvalues = set(), set()
+
+            if conv_f is not None:
+                # conv_f applied to data before inference
+                if cast_type is not None:
+                    warnings.warn(
+                        (
+                            "Both a converter and dtype were specified "
+                            f"for column {c} - only the converter will be used"
+                        ),
+                        ParserWarning,
+                        stacklevel=7,
+                    )
+
+                try:
+                    values = lib.map_infer(values, conv_f)
+                except ValueError:
+                    mask = algorithms.isin(values, list(na_values)).view(np.uint8)
+                    values = lib.map_infer_mask(values, conv_f, mask)
+
+                cvals, na_count = self._infer_types(
+                    values, set(col_na_values) | col_na_fvalues, try_num_bool=False
+                )
+            else:
+                is_ea = is_extension_array_dtype(cast_type)
+                is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type)
+                # skip inference if specified dtype is object
+                # or casting to an EA
+                try_num_bool = not (cast_type and is_str_or_ea_dtype)
+
+                # general type inference and conversion
+                cvals, na_count = self._infer_types(
+                    values, set(col_na_values) | col_na_fvalues, try_num_bool
+                )
+
+                # type specified in dtype param or cast_type is an EA
+                if cast_type and (
+                    not is_dtype_equal(cvals, cast_type)
+                    or is_extension_array_dtype(cast_type)
+                ):
+                    if not is_ea and na_count > 0:
+                        try:
+                            if is_bool_dtype(cast_type):
+                                raise ValueError(
+                                    f"Bool column has NA values in column {c}"
+                                )
+                        except (AttributeError, TypeError):
+                            # invalid input to is_bool_dtype
+                            pass
+                    cast_type = pandas_dtype(cast_type)
+                    cvals = self._cast_types(cvals, cast_type, c)
+
+            result[c] = cvals
+            if verbose and na_count:
+                print(f"Filled {na_count} NA values in column {c!s}")
+        return result
+
+    def _infer_types(self, values, na_values, try_num_bool=True):
+        """
+        Infer types of values, possibly casting
+
+        Parameters
+        ----------
+        values : ndarray
+        na_values : set
+        try_num_bool : bool, default try
+           try to cast values to numeric (first preference) or boolean
+
+        Returns
+        -------
+        converted : ndarray
+        na_count : int
+        """
+        na_count = 0
+        if issubclass(values.dtype.type, (np.number, np.bool_)):
+            mask = algorithms.isin(values, list(na_values))
+            na_count = mask.sum()
+            if na_count > 0:
+                if is_integer_dtype(values):
+                    values = values.astype(np.float64)
+                np.putmask(values, mask, np.nan)
+            return values, na_count
+
+        if try_num_bool and is_object_dtype(values.dtype):
+            # exclude e.g DatetimeIndex here
+            try:
+                result = lib.maybe_convert_numeric(values, na_values, False)
+            except (ValueError, TypeError):
+                # e.g. encountering datetime string gets ValueError
+                #  TypeError can be raised in floatify
+                result = values
+                na_count = parsers.sanitize_objects(result, na_values, False)
+            else:
+                na_count = isna(result).sum()
+        else:
+            result = values
+            if values.dtype == np.object_:
+                na_count = parsers.sanitize_objects(values, na_values, False)
+
+        if result.dtype == np.object_ and try_num_bool:
+            result = libops.maybe_convert_bool(
+                np.asarray(values),
+                true_values=self.true_values,
+                false_values=self.false_values,
+            )
+
+        return result, na_count
+
+    def _cast_types(self, values, cast_type, column):
+        """
+        Cast values to specified type
+
+        Parameters
+        ----------
+        values : ndarray
+        cast_type : string or np.dtype
+           dtype to cast values to
+        column : string
+            column name - used only for error reporting
+
+        Returns
+        -------
+        converted : ndarray
+        """
+        if is_categorical_dtype(cast_type):
+            known_cats = (
+                isinstance(cast_type, CategoricalDtype)
+                and cast_type.categories is not None
+            )
+
+            if not is_object_dtype(values) and not known_cats:
+                # TODO: this is for consistency with
+                # c-parser which parses all categories
+                # as strings
+                values = astype_nansafe(values, str)
+
+            cats = Index(values).unique().dropna()
+            values = Categorical._from_inferred_categories(
+                cats, cats.get_indexer(values), cast_type, true_values=self.true_values
+            )
+
+        # use the EA's implementation of casting
+        elif is_extension_array_dtype(cast_type):
+            # ensure cast_type is an actual dtype and not a string
+            cast_type = pandas_dtype(cast_type)
+            array_type = cast_type.construct_array_type()
+            try:
+                if is_bool_dtype(cast_type):
+                    return array_type._from_sequence_of_strings(
+                        values,
+                        dtype=cast_type,
+                        true_values=self.true_values,
+                        false_values=self.false_values,
+                    )
+                else:
+                    return array_type._from_sequence_of_strings(values, dtype=cast_type)
+            except NotImplementedError as err:
+                raise NotImplementedError(
+                    f"Extension Array: {array_type} must implement "
+                    "_from_sequence_of_strings in order to be used in parser methods"
+                ) from err
+
+        else:
+            try:
+                values = astype_nansafe(values, cast_type, copy=True, skipna=True)
+            except ValueError as err:
+                raise ValueError(
+                    f"Unable to convert column {column} to type {cast_type}"
+                ) from err
+        return values
+
+    def _do_date_conversions(self, names, data):
+        # returns data, columns
+
+        if self.parse_dates is not None:
+            data, names = _process_date_conversion(
+                data,
+                self._date_conv,
+                self.parse_dates,
+                self.index_col,
+                self.index_names,
+                names,
+                keep_date_col=self.keep_date_col,
+            )
+
+        return names, data
+
+    def _evaluate_usecols(self, usecols, names):
+        """
+        Check whether or not the 'usecols' parameter
+        is a callable.  If so, enumerates the 'names'
+        parameter and returns a set of indices for
+        each entry in 'names' that evaluates to True.
+        If not a callable, returns 'usecols'.
+        """
+        if callable(usecols):
+            return {i for i, name in enumerate(names) if usecols(name)}
+        return usecols
+
+    def _validate_usecols_names(self, usecols, names):
+        """
+        Validates that all usecols are present in a given
+        list of names. If not, raise a ValueError that
+        shows what usecols are missing.
+
+        Parameters
+        ----------
+        usecols : iterable of usecols
+            The columns to validate are present in names.
+        names : iterable of names
+            The column names to check against.
+
+        Returns
+        -------
+        usecols : iterable of usecols
+            The `usecols` parameter if the validation succeeds.
+
+        Raises
+        ------
+        ValueError : Columns were missing. Error message will list them.
+        """
+        missing = [c for c in usecols if c not in names]
+        if len(missing) > 0:
+            raise ValueError(
+                f"Usecols do not match columns, columns expected but not found: "
+                f"{missing}"
+            )
+
+        return usecols
+
+    def _validate_usecols_arg(self, usecols):
+        """
+        Validate the 'usecols' parameter.
+
+        Checks whether or not the 'usecols' parameter contains all integers
+        (column selection by index), strings (column by name) or is a callable.
+        Raises a ValueError if that is not the case.
+
+        Parameters
+        ----------
+        usecols : list-like, callable, or None
+            List of columns to use when parsing or a callable that can be used
+            to filter a list of table columns.
+
+        Returns
+        -------
+        usecols_tuple : tuple
+            A tuple of (verified_usecols, usecols_dtype).
+
+            'verified_usecols' is either a set if an array-like is passed in or
+            'usecols' if a callable or None is passed in.
+
+            'usecols_dtype` is the inferred dtype of 'usecols' if an array-like
+            is passed in or None if a callable or None is passed in.
+        """
+        msg = (
+            "'usecols' must either be list-like of all strings, all unicode, "
+            "all integers or a callable."
+        )
+        if usecols is not None:
+            if callable(usecols):
+                return usecols, None
+
+            if not is_list_like(usecols):
+                # see gh-20529
+                #
+                # Ensure it is iterable container but not string.
+                raise ValueError(msg)
+
+            usecols_dtype = lib.infer_dtype(usecols, skipna=False)
+
+            if usecols_dtype not in ("empty", "integer", "string"):
+                raise ValueError(msg)
+
+            usecols = set(usecols)
+
+            return usecols, usecols_dtype
+        return usecols, None
+
+    def _clean_index_names(self, columns, index_col, unnamed_cols):
+        if not is_index_col(index_col):
+            return None, columns, index_col
+
+        columns = list(columns)
+
+        # In case of no rows and multiindex columns we have to set index_names to
+        # list of Nones GH#38292
+        if not columns:
+            return [None] * len(index_col), columns, index_col
+
+        cp_cols = list(columns)
+        index_names = []
+
+        # don't mutate
+        index_col = list(index_col)
+
+        for i, c in enumerate(index_col):
+            if isinstance(c, str):
+                index_names.append(c)
+                for j, name in enumerate(cp_cols):
+                    if name == c:
+                        index_col[i] = j
+                        columns.remove(name)
+                        break
+            else:
+                name = cp_cols[c]
+                columns.remove(name)
+                index_names.append(name)
+
+        # Only clean index names that were placeholders.
+        for i, name in enumerate(index_names):
+            if isinstance(name, str) and name in unnamed_cols:
+                # pandas\io\parsers.py:3445: error: No overload variant of
+                # "__setitem__" of "list" matches argument types "int", "None"
+                # [call-overload]
+                index_names[i] = None  # type: ignore[call-overload]
+
+        return index_names, columns, index_col
+
+    def _get_empty_meta(
+        self, columns, index_col, index_names, dtype: Optional[DtypeArg] = None
+    ):
+        columns = list(columns)
+
+        # Convert `dtype` to a defaultdict of some kind.
+        # This will enable us to write `dtype[col_name]`
+        # without worrying about KeyError issues later on.
+        if not is_dict_like(dtype):
+            # if dtype == None, default will be object.
+            default_dtype = dtype or object
+            dtype = defaultdict(lambda: default_dtype)
+        else:
+            dtype = cast(dict, dtype)
+            dtype = defaultdict(
+                lambda: object,
+                {columns[k] if is_integer(k) else k: v for k, v in dtype.items()},
+            )
+
+        # Even though we have no data, the "index" of the empty DataFrame
+        # could for example still be an empty MultiIndex. Thus, we need to
+        # check whether we have any index columns specified, via either:
+        #
+        # 1) index_col (column indices)
+        # 2) index_names (column names)
+        #
+        # Both must be non-null to ensure a successful construction. Otherwise,
+        # we have to create a generic empty Index.
+        if (index_col is None or index_col is False) or index_names is None:
+            index = Index([])
+        else:
+            data = [Series([], dtype=dtype[name]) for name in index_names]
+            index = ensure_index_from_sequences(data, names=index_names)
+            index_col.sort()
+
+            for i, n in enumerate(index_col):
+                columns.pop(n - i)
+
+        col_dict = {col_name: Series([], dtype=dtype[col_name]) for col_name in columns}
+
+        return index, columns, col_dict
+
+
+def _make_date_converter(
+    date_parser=None, dayfirst=False, infer_datetime_format=False, cache_dates=True
+):
+    def converter(*date_cols):
+        if date_parser is None:
+            strs = parsing.concat_date_cols(date_cols)
+
+            try:
+                return tools.to_datetime(
+                    ensure_object(strs),
+                    utc=None,
+                    dayfirst=dayfirst,
+                    errors="ignore",
+                    infer_datetime_format=infer_datetime_format,
+                    cache=cache_dates,
+                ).to_numpy()
+
+            except ValueError:
+                return tools.to_datetime(
+                    parsing.try_parse_dates(strs, dayfirst=dayfirst), cache=cache_dates
+                )
+        else:
+            try:
+                result = tools.to_datetime(
+                    date_parser(*date_cols), errors="ignore", cache=cache_dates
+                )
+                if isinstance(result, datetime.datetime):
+                    raise Exception("scalar parser")
+                return result
+            except Exception:
+                try:
+                    return tools.to_datetime(
+                        parsing.try_parse_dates(
+                            parsing.concat_date_cols(date_cols),
+                            parser=date_parser,
+                            dayfirst=dayfirst,
+                        ),
+                        errors="ignore",
+                    )
+                except Exception:
+                    return generic_parser(date_parser, *date_cols)
+
+    return converter
+
+
+def _process_date_conversion(
+    data_dict,
+    converter,
+    parse_spec,
+    index_col,
+    index_names,
+    columns,
+    keep_date_col=False,
+):
+    def _isindex(colspec):
+        return (isinstance(index_col, list) and colspec in index_col) or (
+            isinstance(index_names, list) and colspec in index_names
+        )
+
+    new_cols = []
+    new_data = {}
+
+    orig_names = columns
+    columns = list(columns)
+
+    date_cols = set()
+
+    if parse_spec is None or isinstance(parse_spec, bool):
+        return data_dict, columns
+
+    if isinstance(parse_spec, list):
+        # list of column lists
+        for colspec in parse_spec:
+            if is_scalar(colspec):
+                if isinstance(colspec, int) and colspec not in data_dict:
+                    colspec = orig_names[colspec]
+                if _isindex(colspec):
+                    continue
+                data_dict[colspec] = converter(data_dict[colspec])
+            else:
+                new_name, col, old_names = _try_convert_dates(
+                    converter, colspec, data_dict, orig_names
+                )
+                if new_name in data_dict:
+                    raise ValueError(f"New date column already in dict {new_name}")
+                new_data[new_name] = col
+                new_cols.append(new_name)
+                date_cols.update(old_names)
+
+    elif isinstance(parse_spec, dict):
+        # dict of new name to column list
+        for new_name, colspec in parse_spec.items():
+            if new_name in data_dict:
+                raise ValueError(f"Date column {new_name} already in dict")
+
+            _, col, old_names = _try_convert_dates(
+                converter, colspec, data_dict, orig_names
+            )
+
+            new_data[new_name] = col
+            new_cols.append(new_name)
+            date_cols.update(old_names)
+
+    data_dict.update(new_data)
+    new_cols.extend(columns)
+
+    if not keep_date_col:
+        for c in list(date_cols):
+            data_dict.pop(c)
+            new_cols.remove(c)
+
+    return data_dict, new_cols
+
+
+def _try_convert_dates(parser, colspec, data_dict, columns):
+    colset = set(columns)
+    colnames = []
+
+    for c in colspec:
+        if c in colset:
+            colnames.append(c)
+        elif isinstance(c, int) and c not in columns:
+            colnames.append(columns[c])
+        else:
+            colnames.append(c)
+
+    new_name = "_".join(str(x) for x in colnames)
+    to_parse = [data_dict[c] for c in colnames if c in data_dict]
+
+    new_col = parser(*to_parse)
+    return new_name, new_col, colnames
+
+
+def _get_na_values(col, na_values, na_fvalues, keep_default_na):
+    """
+    Get the NaN values for a given column.
+
+    Parameters
+    ----------
+    col : str
+        The name of the column.
+    na_values : array-like, dict
+        The object listing the NaN values as strings.
+    na_fvalues : array-like, dict
+        The object listing the NaN values as floats.
+    keep_default_na : bool
+        If `na_values` is a dict, and the column is not mapped in the
+        dictionary, whether to return the default NaN values or the empty set.
+
+    Returns
+    -------
+    nan_tuple : A length-two tuple composed of
+
+        1) na_values : the string NaN values for that column.
+        2) na_fvalues : the float NaN values for that column.
+    """
+    if isinstance(na_values, dict):
+        if col in na_values:
+            return na_values[col], na_fvalues[col]
+        else:
+            if keep_default_na:
+                return STR_NA_VALUES, set()
+
+            return set(), set()
+    else:
+        return na_values, na_fvalues
+
+
+# Seems to be unused
+def _get_col_names(colspec, columns):
+    colset = set(columns)
+    colnames = []
+    for c in colspec:
+        if c in colset:
+            colnames.append(c)
+        elif isinstance(c, int):
+            colnames.append(columns[c])
+    return colnames
+
+
+def _is_potential_multi_index(
+    columns, index_col: Optional[Union[bool, Sequence[int]]] = None
+):
+    """
+    Check whether or not the `columns` parameter
+    could be converted into a MultiIndex.
+
+    Parameters
+    ----------
+    columns : array-like
+        Object which may or may not be convertible into a MultiIndex
+    index_col : None, bool or list, optional
+        Column or columns to use as the (possibly hierarchical) index
+
+    Returns
+    -------
+    boolean : Whether or not columns could become a MultiIndex
+    """
+    if index_col is None or isinstance(index_col, bool):
+        index_col = []
+
+    return (
+        len(columns)
+        and not isinstance(columns, MultiIndex)
+        and all(isinstance(c, tuple) for c in columns if c not in list(index_col))
+    )
+
+
+def _validate_parse_dates_arg(parse_dates):
+    """
+    Check whether or not the 'parse_dates' parameter
+    is a non-boolean scalar. Raises a ValueError if
+    that is the case.
+    """
+    msg = (
+        "Only booleans, lists, and dictionaries are accepted "
+        "for the 'parse_dates' parameter"
+    )
+
+    if parse_dates is not None:
+        if is_scalar(parse_dates):
+            if not lib.is_bool(parse_dates):
+                raise TypeError(msg)
+
+        elif not isinstance(parse_dates, (list, dict)):
+            raise TypeError(msg)
+
+    return parse_dates
+
+
+def is_index_col(col):
+    return col is not None and col is not False
diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py
new file mode 100644
index 0000000000000..b4c00dfe9b3e7
--- /dev/null
+++ b/pandas/io/parsers/c_parser_wrapper.py
@@ -0,0 +1,328 @@
+import pandas._libs.parsers as parsers
+from pandas._typing import FilePathOrBuffer
+
+from pandas.core.dtypes.common import is_integer
+
+from pandas.core.indexes.api import ensure_index_from_sequences
+
+from pandas.io.parsers.base_parser import ParserBase, is_index_col
+
+
+class CParserWrapper(ParserBase):
+    def __init__(self, src: FilePathOrBuffer, **kwds):
+        self.kwds = kwds
+        kwds = kwds.copy()
+
+        ParserBase.__init__(self, kwds)
+
+        # #2442
+        kwds["allow_leading_cols"] = self.index_col is not False
+
+        # GH20529, validate usecol arg before TextReader
+        self.usecols, self.usecols_dtype = self._validate_usecols_arg(kwds["usecols"])
+        kwds["usecols"] = self.usecols
+
+        # open handles
+        self._open_handles(src, kwds)
+        assert self.handles is not None
+        for key in ("storage_options", "encoding", "memory_map", "compression"):
+            kwds.pop(key, None)
+        if self.handles.is_mmap and hasattr(self.handles.handle, "mmap"):
+            # pandas\io\parsers.py:1861: error: Item "IO[Any]" of
+            # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
+            # TextIOWrapper, mmap]" has no attribute "mmap"  [union-attr]
+
+            # pandas\io\parsers.py:1861: error: Item "RawIOBase" of
+            # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
+            # TextIOWrapper, mmap]" has no attribute "mmap"  [union-attr]
+
+            # pandas\io\parsers.py:1861: error: Item "BufferedIOBase" of
+            # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
+            # TextIOWrapper, mmap]" has no attribute "mmap"  [union-attr]
+
+            # pandas\io\parsers.py:1861: error: Item "TextIOBase" of
+            # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
+            # TextIOWrapper, mmap]" has no attribute "mmap"  [union-attr]
+
+            # pandas\io\parsers.py:1861: error: Item "TextIOWrapper" of
+            # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase,
+            # TextIOWrapper, mmap]" has no attribute "mmap"  [union-attr]
+
+            # pandas\io\parsers.py:1861: error: Item "mmap" of "Union[IO[Any],
+            # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]" has
+            # no attribute "mmap"  [union-attr]
+            self.handles.handle = self.handles.handle.mmap  # type: ignore[union-attr]
+
+        try:
+            self._reader = parsers.TextReader(self.handles.handle, **kwds)
+        except Exception:
+            self.handles.close()
+            raise
+        self.unnamed_cols = self._reader.unnamed_cols
+
+        passed_names = self.names is None
+
+        if self._reader.header is None:
+            self.names = None
+        else:
+            if len(self._reader.header) > 1:
+                # we have a multi index in the columns
+                (
+                    self.names,
+                    self.index_names,
+                    self.col_names,
+                    passed_names,
+                ) = self._extract_multi_indexer_columns(
+                    self._reader.header, self.index_names, self.col_names, passed_names
+                )
+            else:
+                self.names = list(self._reader.header[0])
+
+        if self.names is None:
+            if self.prefix:
+                self.names = [
+                    f"{self.prefix}{i}" for i in range(self._reader.table_width)
+                ]
+            else:
+                self.names = list(range(self._reader.table_width))
+
+        # gh-9755
+        #
+        # need to set orig_names here first
+        # so that proper indexing can be done
+        # with _set_noconvert_columns
+        #
+        # once names has been filtered, we will
+        # then set orig_names again to names
+        self.orig_names = self.names[:]
+
+        if self.usecols:
+            usecols = self._evaluate_usecols(self.usecols, self.orig_names)
+
+            # GH 14671
+            # assert for mypy, orig_names is List or None, None would error in issubset
+            assert self.orig_names is not None
+            if self.usecols_dtype == "string" and not set(usecols).issubset(
+                self.orig_names
+            ):
+                self._validate_usecols_names(usecols, self.orig_names)
+
+            if len(self.names) > len(usecols):
+                self.names = [
+                    n
+                    for i, n in enumerate(self.names)
+                    if (i in usecols or n in usecols)
+                ]
+
+            if len(self.names) < len(usecols):
+                self._validate_usecols_names(usecols, self.names)
+
+        self._validate_parse_dates_presence(self.names)
+        self._set_noconvert_columns()
+
+        self.orig_names = self.names
+
+        if not self._has_complex_date_col:
+            if self._reader.leading_cols == 0 and is_index_col(self.index_col):
+
+                self._name_processed = True
+                (index_names, self.names, self.index_col) = self._clean_index_names(
+                    self.names, self.index_col, self.unnamed_cols
+                )
+
+                if self.index_names is None:
+                    self.index_names = index_names
+
+            if self._reader.header is None and not passed_names:
+                # pandas\io\parsers.py:1997: error: Argument 1 to "len" has
+                # incompatible type "Optional[Any]"; expected "Sized"
+                # [arg-type]
+                self.index_names = [None] * len(
+                    self.index_names  # type: ignore[arg-type]
+                )
+
+        self._implicit_index = self._reader.leading_cols > 0
+
+    def close(self) -> None:
+        super().close()
+
+        # close additional handles opened by C parser
+        try:
+            self._reader.close()
+        except ValueError:
+            pass
+
+    def _set_noconvert_columns(self):
+        """
+        Set the columns that should not undergo dtype conversions.
+
+        Currently, any column that is involved with date parsing will not
+        undergo such conversions.
+        """
+        names = self.orig_names
+        if self.usecols_dtype == "integer":
+            # A set of integers will be converted to a list in
+            # the correct order every single time.
+            usecols = list(self.usecols)
+            usecols.sort()
+        elif callable(self.usecols) or self.usecols_dtype not in ("empty", None):
+            # The names attribute should have the correct columns
+            # in the proper order for indexing with parse_dates.
+            usecols = self.names[:]
+        else:
+            # Usecols is empty.
+
+            # pandas\io\parsers.py:2030: error: Incompatible types in
+            # assignment (expression has type "None", variable has type
+            # "List[Any]")  [assignment]
+            usecols = None  # type: ignore[assignment]
+
+        def _set(x):
+            if usecols is not None and is_integer(x):
+                x = usecols[x]
+
+            if not is_integer(x):
+                # assert for mypy, names is List or None, None would error when calling
+                # .index()
+                assert names is not None
+                x = names.index(x)
+
+            self._reader.set_noconvert(x)
+
+        if isinstance(self.parse_dates, list):
+            for val in self.parse_dates:
+                if isinstance(val, list):
+                    for k in val:
+                        _set(k)
+                else:
+                    _set(val)
+
+        elif isinstance(self.parse_dates, dict):
+            for val in self.parse_dates.values():
+                if isinstance(val, list):
+                    for k in val:
+                        _set(k)
+                else:
+                    _set(val)
+
+        elif self.parse_dates:
+            if isinstance(self.index_col, list):
+                for k in self.index_col:
+                    _set(k)
+            elif self.index_col is not None:
+                _set(self.index_col)
+
+    def set_error_bad_lines(self, status):
+        self._reader.set_error_bad_lines(int(status))
+
+    def read(self, nrows=None):
+        try:
+            data = self._reader.read(nrows)
+        except StopIteration:
+            if self._first_chunk:
+                self._first_chunk = False
+                names = self._maybe_dedup_names(self.orig_names)
+                index, columns, col_dict = self._get_empty_meta(
+                    names,
+                    self.index_col,
+                    self.index_names,
+                    dtype=self.kwds.get("dtype"),
+                )
+                columns = self._maybe_make_multi_index_columns(columns, self.col_names)
+
+                if self.usecols is not None:
+                    columns = self._filter_usecols(columns)
+
+                col_dict = {k: v for k, v in col_dict.items() if k in columns}
+
+                return index, columns, col_dict
+
+            else:
+                self.close()
+                raise
+
+        # Done with first read, next time raise StopIteration
+        self._first_chunk = False
+
+        names = self.names
+
+        if self._reader.leading_cols:
+            if self._has_complex_date_col:
+                raise NotImplementedError("file structure not yet supported")
+
+            # implicit index, no index names
+            arrays = []
+
+            for i in range(self._reader.leading_cols):
+                if self.index_col is None:
+                    values = data.pop(i)
+                else:
+                    values = data.pop(self.index_col[i])
+
+                values = self._maybe_parse_dates(values, i, try_parse_dates=True)
+                arrays.append(values)
+
+            index = ensure_index_from_sequences(arrays)
+
+            if self.usecols is not None:
+                names = self._filter_usecols(names)
+
+            names = self._maybe_dedup_names(names)
+
+            # rename dict keys
+            data = sorted(data.items())
+            data = {k: v for k, (i, v) in zip(names, data)}
+
+            names, data = self._do_date_conversions(names, data)
+
+        else:
+            # rename dict keys
+            data = sorted(data.items())
+
+            # ugh, mutation
+
+            # assert for mypy, orig_names is List or None, None would error in list(...)
+            assert self.orig_names is not None
+            names = list(self.orig_names)
+            names = self._maybe_dedup_names(names)
+
+            if self.usecols is not None:
+                names = self._filter_usecols(names)
+
+            # columns as list
+            alldata = [x[1] for x in data]
+
+            data = {k: v for k, (i, v) in zip(names, data)}
+
+            names, data = self._do_date_conversions(names, data)
+            index, names = self._make_index(data, alldata, names)
+
+        # maybe create a mi on the columns
+        names = self._maybe_make_multi_index_columns(names, self.col_names)
+
+        return index, names, data
+
+    def _filter_usecols(self, names):
+        # hackish
+        usecols = self._evaluate_usecols(self.usecols, names)
+        if usecols is not None and len(names) != len(usecols):
+            names = [
+                name for i, name in enumerate(names) if i in usecols or name in usecols
+            ]
+        return names
+
+    def _get_index_names(self):
+        names = list(self._reader.header[0])
+        idx_names = None
+
+        if self._reader.leading_cols == 0 and self.index_col is not None:
+            (idx_names, names, self.index_col) = self._clean_index_names(
+                names, self.index_col, self.unnamed_cols
+            )
+
+        return names, idx_names
+
+    def _maybe_parse_dates(self, values, index, try_parse_dates=True):
+        if try_parse_dates and self._should_parse_dates(index):
+            values = self._date_conv(values)
+        return values
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
new file mode 100644
index 0000000000000..ba05eb4a6599f
--- /dev/null
+++ b/pandas/io/parsers/python_parser.py
@@ -0,0 +1,1263 @@
+from collections import abc, defaultdict
+import csv
+from io import StringIO
+import re
+import sys
+from typing import Iterator, List, Optional, cast
+
+import numpy as np
+
+import pandas._libs.lib as lib
+from pandas._typing import FilePathOrBuffer, Union
+from pandas.errors import EmptyDataError, ParserError
+
+from pandas.core.dtypes.common import is_integer
+
+from pandas.io.parsers.base_parser import ParserBase, parser_defaults
+
+# BOM character (byte order mark)
+# This exists at the beginning of a file to indicate endianness
+# of a file (stream). Unfortunately, this marker screws up parsing,
+# so we need to remove it if we see it.
+_BOM = "\ufeff"
+
+
+class PythonParser(ParserBase):
+    def __init__(self, f: Union[FilePathOrBuffer, List], **kwds):
+        """
+        Workhorse function for processing nested list into DataFrame
+        """
+        ParserBase.__init__(self, kwds)
+
+        self.data: Optional[Iterator[str]] = None
+        self.buf: List = []
+        self.pos = 0
+        self.line_pos = 0
+
+        self.skiprows = kwds["skiprows"]
+
+        if callable(self.skiprows):
+            self.skipfunc = self.skiprows
+        else:
+            self.skipfunc = lambda x: x in self.skiprows
+
+        self.skipfooter = _validate_skipfooter_arg(kwds["skipfooter"])
+        self.delimiter = kwds["delimiter"]
+
+        self.quotechar = kwds["quotechar"]
+        if isinstance(self.quotechar, str):
+            self.quotechar = str(self.quotechar)
+
+        self.escapechar = kwds["escapechar"]
+        self.doublequote = kwds["doublequote"]
+        self.skipinitialspace = kwds["skipinitialspace"]
+        self.lineterminator = kwds["lineterminator"]
+        self.quoting = kwds["quoting"]
+        self.usecols, _ = self._validate_usecols_arg(kwds["usecols"])
+        self.skip_blank_lines = kwds["skip_blank_lines"]
+
+        self.warn_bad_lines = kwds["warn_bad_lines"]
+        self.error_bad_lines = kwds["error_bad_lines"]
+
+        self.names_passed = kwds["names"] or None
+
+        self.has_index_names = False
+        if "has_index_names" in kwds:
+            self.has_index_names = kwds["has_index_names"]
+
+        self.verbose = kwds["verbose"]
+        self.converters = kwds["converters"]
+
+        self.dtype = kwds["dtype"]
+        self.thousands = kwds["thousands"]
+        self.decimal = kwds["decimal"]
+
+        self.comment = kwds["comment"]
+
+        # Set self.data to something that can read lines.
+        if isinstance(f, list):
+            # read_excel: f is a list
+            self.data = cast(Iterator[str], f)
+        else:
+            self._open_handles(f, kwds)
+            assert self.handles is not None
+            assert hasattr(self.handles.handle, "readline")
+            try:
+                self._make_reader(self.handles.handle)
+            except (csv.Error, UnicodeDecodeError):
+                self.close()
+                raise
+
+        # Get columns in two steps: infer from data, then
+        # infer column indices from self.usecols if it is specified.
+        self._col_indices: Optional[List[int]] = None
+        try:
+            (
+                self.columns,
+                self.num_original_columns,
+                self.unnamed_cols,
+            ) = self._infer_columns()
+        except (TypeError, ValueError):
+            self.close()
+            raise
+
+        # Now self.columns has the set of columns that we will process.
+        # The original set is stored in self.original_columns.
+        if len(self.columns) > 1:
+            # we are processing a multi index column
+            (
+                self.columns,
+                self.index_names,
+                self.col_names,
+                _,
+            ) = self._extract_multi_indexer_columns(
+                self.columns, self.index_names, self.col_names
+            )
+            # Update list of original names to include all indices.
+            self.num_original_columns = len(self.columns)
+        else:
+            self.columns = self.columns[0]
+
+        # get popped off for index
+        self.orig_names = list(self.columns)
+
+        # needs to be cleaned/refactored
+        # multiple date column thing turning into a real spaghetti factory
+
+        if not self._has_complex_date_col:
+            (index_names, self.orig_names, self.columns) = self._get_index_name(
+                self.columns
+            )
+            self._name_processed = True
+            if self.index_names is None:
+                self.index_names = index_names
+
+        if self._col_indices is None:
+            self._col_indices = list(range(len(self.columns)))
+
+        self._validate_parse_dates_presence(self.columns)
+        if self.parse_dates:
+            self._no_thousands_columns = self._set_no_thousands_columns()
+        else:
+            self._no_thousands_columns = None
+
+        if len(self.decimal) != 1:
+            raise ValueError("Only length-1 decimal markers supported")
+
+        decimal = re.escape(self.decimal)
+        if self.thousands is None:
+            regex = fr"^[\-\+]?[0-9]*({decimal}[0-9]*)?([0-9]?(E|e)\-?[0-9]+)?$"
+        else:
+            thousands = re.escape(self.thousands)
+            regex = (
+                fr"^[\-\+]?([0-9]+{thousands}|[0-9])*({decimal}[0-9]*)?"
+                fr"([0-9]?(E|e)\-?[0-9]+)?$"
+            )
+        self.num = re.compile(regex)
+
+    def _set_no_thousands_columns(self):
+        # Create a set of column ids that are not to be stripped of thousands
+        # operators.
+        noconvert_columns = set()
+
+        def _set(x):
+            if is_integer(x):
+                noconvert_columns.add(x)
+            else:
+                assert self._col_indices is not None
+                col_indices = self._col_indices
+                noconvert_columns.add(col_indices[self.columns.index(x)])
+
+        if isinstance(self.parse_dates, list):
+            for val in self.parse_dates:
+                if isinstance(val, list):
+                    for k in val:
+                        _set(k)
+                else:
+                    _set(val)
+
+        elif isinstance(self.parse_dates, dict):
+            for val in self.parse_dates.values():
+                if isinstance(val, list):
+                    for k in val:
+                        _set(k)
+                else:
+                    _set(val)
+
+        elif self.parse_dates:
+            if isinstance(self.index_col, list):
+                for k in self.index_col:
+                    _set(k)
+            elif self.index_col is not None:
+                _set(self.index_col)
+
+        return noconvert_columns
+
+    def _make_reader(self, f):
+        sep = self.delimiter
+
+        if sep is None or len(sep) == 1:
+            if self.lineterminator:
+                raise ValueError(
+                    "Custom line terminators not supported in python parser (yet)"
+                )
+
+            class MyDialect(csv.Dialect):
+                delimiter = self.delimiter
+                quotechar = self.quotechar
+                escapechar = self.escapechar
+                doublequote = self.doublequote
+                skipinitialspace = self.skipinitialspace
+                quoting = self.quoting
+                lineterminator = "\n"
+
+            dia = MyDialect
+
+            if sep is not None:
+                dia.delimiter = sep
+            else:
+                # attempt to sniff the delimiter from the first valid line,
+                # i.e. no comment line and not in skiprows
+                line = f.readline()
+                lines = self._check_comments([[line]])[0]
+                while self.skipfunc(self.pos) or not lines:
+                    self.pos += 1
+                    line = f.readline()
+                    lines = self._check_comments([[line]])[0]
+
+                # since `line` was a string, lines will be a list containing
+                # only a single string
+                line = lines[0]
+
+                self.pos += 1
+                self.line_pos += 1
+                sniffed = csv.Sniffer().sniff(line)
+                dia.delimiter = sniffed.delimiter
+
+                # Note: encoding is irrelevant here
+                line_rdr = csv.reader(StringIO(line), dialect=dia)
+                self.buf.extend(list(line_rdr))
+
+            # Note: encoding is irrelevant here
+            reader = csv.reader(f, dialect=dia, strict=True)
+
+        else:
+
+            def _read():
+                line = f.readline()
+                pat = re.compile(sep)
+
+                yield pat.split(line.strip())
+
+                for line in f:
+                    yield pat.split(line.strip())
+
+            reader = _read()
+
+        # pandas\io\parsers.py:2427: error: Incompatible types in assignment
+        # (expression has type "_reader", variable has type "Union[IO[Any],
+        # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap, None]")
+        # [assignment]
+        self.data = reader  # type: ignore[assignment]
+
+    def read(self, rows=None):
+        try:
+            content = self._get_lines(rows)
+        except StopIteration:
+            if self._first_chunk:
+                content = []
+            else:
+                self.close()
+                raise
+
+        # done with first read, next time raise StopIteration
+        self._first_chunk = False
+
+        # pandas\io\parsers.py:2480: error: Argument 1 to "list" has
+        # incompatible type "Optional[Any]"; expected "Iterable[Any]"
+        # [arg-type]
+        columns = list(self.orig_names)  # type: ignore[arg-type]
+        if not len(content):  # pragma: no cover
+            # DataFrame with the right metadata, even though it's length 0
+            names = self._maybe_dedup_names(self.orig_names)
+            index, columns, col_dict = self._get_empty_meta(
+                names, self.index_col, self.index_names, self.dtype
+            )
+            columns = self._maybe_make_multi_index_columns(columns, self.col_names)
+            return index, columns, col_dict
+
+        # handle new style for names in index
+        count_empty_content_vals = count_empty_vals(content[0])
+        indexnamerow = None
+        if self.has_index_names and count_empty_content_vals == len(columns):
+            indexnamerow = content[0]
+            content = content[1:]
+
+        alldata = self._rows_to_cols(content)
+        data, columns = self._exclude_implicit_index(alldata)
+
+        columns, data = self._do_date_conversions(columns, data)
+
+        data = self._convert_data(data)
+        index, columns = self._make_index(data, alldata, columns, indexnamerow)
+
+        return index, columns, data
+
+    def _exclude_implicit_index(self, alldata):
+        names = self._maybe_dedup_names(self.orig_names)
+
+        offset = 0
+        if self._implicit_index:
+            offset = len(self.index_col)
+
+        if self._col_indices is not None and len(names) != len(self._col_indices):
+            names = [names[i] for i in sorted(self._col_indices)]
+
+        return {name: alldata[i + offset] for i, name in enumerate(names)}, names
+
+    # legacy
+    def get_chunk(self, size=None):
+        if size is None:
+            # pandas\io\parsers.py:2528: error: "PythonParser" has no attribute
+            # "chunksize"  [attr-defined]
+            size = self.chunksize  # type: ignore[attr-defined]
+        return self.read(rows=size)
+
+    def _convert_data(self, data):
+        # apply converters
+        def _clean_mapping(mapping):
+            """converts col numbers to names"""
+            clean = {}
+            for col, v in mapping.items():
+                # pandas\io\parsers.py:2537: error: Unsupported right operand
+                # type for in ("Optional[Any]")  [operator]
+                if (
+                    isinstance(col, int)
+                    and col not in self.orig_names  # type: ignore[operator]
+                ):
+                    # pandas\io\parsers.py:2538: error: Value of type
+                    # "Optional[Any]" is not indexable  [index]
+                    col = self.orig_names[col]  # type: ignore[index]
+                clean[col] = v
+            return clean
+
+        clean_conv = _clean_mapping(self.converters)
+        if not isinstance(self.dtype, dict):
+            # handles single dtype applied to all columns
+            clean_dtypes = self.dtype
+        else:
+            clean_dtypes = _clean_mapping(self.dtype)
+
+        # Apply NA values.
+        clean_na_values = {}
+        clean_na_fvalues = {}
+
+        if isinstance(self.na_values, dict):
+            for col in self.na_values:
+                na_value = self.na_values[col]
+                na_fvalue = self.na_fvalues[col]
+
+                # pandas\io\parsers.py:2558: error: Unsupported right operand
+                # type for in ("Optional[Any]")  [operator]
+                if (
+                    isinstance(col, int)
+                    and col not in self.orig_names  # type: ignore[operator]
+                ):
+                    # pandas\io\parsers.py:2559: error: Value of type
+                    # "Optional[Any]" is not indexable  [index]
+                    col = self.orig_names[col]  # type: ignore[index]
+
+                clean_na_values[col] = na_value
+                clean_na_fvalues[col] = na_fvalue
+        else:
+            clean_na_values = self.na_values
+            clean_na_fvalues = self.na_fvalues
+
+        return self._convert_to_ndarrays(
+            data,
+            clean_na_values,
+            clean_na_fvalues,
+            self.verbose,
+            clean_conv,
+            clean_dtypes,
+        )
+
+    def _infer_columns(self):
+        names = self.names
+        num_original_columns = 0
+        clear_buffer = True
+        # pandas\io\parsers.py:2580: error: Need type annotation for
+        # 'unnamed_cols' (hint: "unnamed_cols: Set[<type>] = ...")
+        # [var-annotated]
+        unnamed_cols = set()  # type: ignore[var-annotated]
+
+        if self.header is not None:
+            header = self.header
+
+            if isinstance(header, (list, tuple, np.ndarray)):
+                have_mi_columns = len(header) > 1
+                # we have a mi columns, so read an extra line
+                if have_mi_columns:
+                    header = list(header) + [header[-1] + 1]
+            else:
+                have_mi_columns = False
+                header = [header]
+
+            # pandas\io\parsers.py:2594: error: Need type annotation for
+            # 'columns' (hint: "columns: List[<type>] = ...")  [var-annotated]
+            columns = []  # type: ignore[var-annotated]
+            for level, hr in enumerate(header):
+                try:
+                    line = self._buffered_line()
+
+                    while self.line_pos <= hr:
+                        line = self._next_line()
+
+                except StopIteration as err:
+                    if self.line_pos < hr:
+                        raise ValueError(
+                            f"Passed header={hr} but only {self.line_pos + 1} lines in "
+                            "file"
+                        ) from err
+
+                    # We have an empty file, so check
+                    # if columns are provided. That will
+                    # serve as the 'line' for parsing
+                    if have_mi_columns and hr > 0:
+                        if clear_buffer:
+                            self._clear_buffer()
+                        columns.append([None] * len(columns[-1]))
+                        return columns, num_original_columns, unnamed_cols
+
+                    if not self.names:
+                        raise EmptyDataError("No columns to parse from file") from err
+
+                    line = self.names[:]
+
+                this_columns = []
+                this_unnamed_cols = []
+
+                for i, c in enumerate(line):
+                    if c == "":
+                        if have_mi_columns:
+                            col_name = f"Unnamed: {i}_level_{level}"
+                        else:
+                            col_name = f"Unnamed: {i}"
+
+                        this_unnamed_cols.append(i)
+                        this_columns.append(col_name)
+                    else:
+                        this_columns.append(c)
+
+                if not have_mi_columns and self.mangle_dupe_cols:
+                    # pandas\io\parsers.py:2639: error: Need type annotation
+                    # for 'counts'  [var-annotated]
+                    counts = defaultdict(int)  # type: ignore[var-annotated]
+
+                    for i, col in enumerate(this_columns):
+                        cur_count = counts[col]
+
+                        while cur_count > 0:
+                            counts[col] = cur_count + 1
+                            col = f"{col}.{cur_count}"
+                            cur_count = counts[col]
+
+                        this_columns[i] = col
+                        counts[col] = cur_count + 1
+                elif have_mi_columns:
+
+                    # if we have grabbed an extra line, but its not in our
+                    # format so save in the buffer, and create an blank extra
+                    # line for the rest of the parsing code
+                    if hr == header[-1]:
+                        lc = len(this_columns)
+                        ic = len(self.index_col) if self.index_col is not None else 0
+                        unnamed_count = len(this_unnamed_cols)
+
+                        if lc != unnamed_count and lc - ic > unnamed_count:
+                            clear_buffer = False
+                            # pandas\io\parsers.py:2663: error: List item 0 has
+                            # incompatible type "None"; expected "str"
+                            # [list-item]
+                            this_columns = [None] * lc  # type: ignore[list-item]
+                            self.buf = [self.buf[-1]]
+
+                # pandas\io\parsers.py:2666: error: Argument 1 to "append" of
+                # "list" has incompatible type "List[str]"; expected
+                # "List[None]"  [arg-type]
+                columns.append(this_columns)  # type: ignore[arg-type]
+                unnamed_cols.update({this_columns[i] for i in this_unnamed_cols})
+
+                if len(columns) == 1:
+                    num_original_columns = len(this_columns)
+
+            if clear_buffer:
+                self._clear_buffer()
+
+            if names is not None:
+                if len(names) > len(columns[0]):
+                    raise ValueError(
+                        "Number of passed names did not match "
+                        "number of header fields in the file"
+                    )
+                if len(columns) > 1:
+                    raise TypeError("Cannot pass names with multi-index columns")
+
+                if self.usecols is not None:
+                    # Set _use_cols. We don't store columns because they are
+                    # overwritten.
+                    self._handle_usecols(columns, names)
+                else:
+                    num_original_columns = len(names)
+                columns = [names]
+            else:
+                columns = self._handle_usecols(columns, columns[0])
+        else:
+            try:
+                line = self._buffered_line()
+
+            except StopIteration as err:
+                if not names:
+                    raise EmptyDataError("No columns to parse from file") from err
+
+                line = names[:]
+
+            ncols = len(line)
+            num_original_columns = ncols
+
+            if not names:
+                if self.prefix:
+                    # pandas\io\parsers.py:2711: error: List comprehension has
+                    # incompatible type List[str]; expected List[None]  [misc]
+                    columns = [
+                        [
+                            f"{self.prefix}{i}"  # type: ignore[misc]
+                            for i in range(ncols)
+                        ]
+                    ]
+                else:
+                    # pandas\io\parsers.py:2713: error: Argument 1 to "list"
+                    # has incompatible type "range"; expected "Iterable[None]"
+                    # [arg-type]
+                    columns = [list(range(ncols))]  # type: ignore[arg-type]
+                columns = self._handle_usecols(columns, columns[0])
+            else:
+                if self.usecols is None or len(names) >= num_original_columns:
+                    columns = self._handle_usecols([names], names)
+                    num_original_columns = len(names)
+                else:
+                    if not callable(self.usecols) and len(names) != len(self.usecols):
+                        raise ValueError(
+                            "Number of passed names did not match number of "
+                            "header fields in the file"
+                        )
+                    # Ignore output but set used columns.
+                    self._handle_usecols([names], names)
+                    columns = [names]
+                    num_original_columns = ncols
+
+        return columns, num_original_columns, unnamed_cols
+
+    def _handle_usecols(self, columns, usecols_key):
+        """
+        Sets self._col_indices
+
+        usecols_key is used if there are string usecols.
+        """
+        if self.usecols is not None:
+            if callable(self.usecols):
+                col_indices = self._evaluate_usecols(self.usecols, usecols_key)
+            elif any(isinstance(u, str) for u in self.usecols):
+                if len(columns) > 1:
+                    raise ValueError(
+                        "If using multiple headers, usecols must be integers."
+                    )
+                col_indices = []
+
+                for col in self.usecols:
+                    if isinstance(col, str):
+                        try:
+                            col_indices.append(usecols_key.index(col))
+                        except ValueError:
+                            self._validate_usecols_names(self.usecols, usecols_key)
+                    else:
+                        col_indices.append(col)
+            else:
+                col_indices = self.usecols
+
+            columns = [
+                [n for i, n in enumerate(column) if i in col_indices]
+                for column in columns
+            ]
+            self._col_indices = sorted(col_indices)
+        return columns
+
+    def _buffered_line(self):
+        """
+        Return a line from buffer, filling buffer if required.
+        """
+        if len(self.buf) > 0:
+            return self.buf[0]
+        else:
+            return self._next_line()
+
+    def _check_for_bom(self, first_row):
+        """
+        Checks whether the file begins with the BOM character.
+        If it does, remove it. In addition, if there is quoting
+        in the field subsequent to the BOM, remove it as well
+        because it technically takes place at the beginning of
+        the name, not the middle of it.
+        """
+        # first_row will be a list, so we need to check
+        # that that list is not empty before proceeding.
+        if not first_row:
+            return first_row
+
+        # The first element of this row is the one that could have the
+        # BOM that we want to remove. Check that the first element is a
+        # string before proceeding.
+        if not isinstance(first_row[0], str):
+            return first_row
+
+        # Check that the string is not empty, as that would
+        # obviously not have a BOM at the start of it.
+        if not first_row[0]:
+            return first_row
+
+        # Since the string is non-empty, check that it does
+        # in fact begin with a BOM.
+        first_elt = first_row[0][0]
+        if first_elt != _BOM:
+            return first_row
+
+        first_row_bom = first_row[0]
+
+        if len(first_row_bom) > 1 and first_row_bom[1] == self.quotechar:
+            start = 2
+            quote = first_row_bom[1]
+            end = first_row_bom[2:].index(quote) + 2
+
+            # Extract the data between the quotation marks
+            new_row = first_row_bom[start:end]
+
+            # Extract any remaining data after the second
+            # quotation mark.
+            if len(first_row_bom) > end + 1:
+                new_row += first_row_bom[end + 1 :]
+
+        else:
+
+            # No quotation so just remove BOM from first element
+            new_row = first_row_bom[1:]
+        return [new_row] + first_row[1:]
+
+    def _is_line_empty(self, line):
+        """
+        Check if a line is empty or not.
+
+        Parameters
+        ----------
+        line : str, array-like
+            The line of data to check.
+
+        Returns
+        -------
+        boolean : Whether or not the line is empty.
+        """
+        return not line or all(not x for x in line)
+
+    def _next_line(self):
+        if isinstance(self.data, list):
+            while self.skipfunc(self.pos):
+                self.pos += 1
+
+            while True:
+                try:
+                    line = self._check_comments([self.data[self.pos]])[0]
+                    self.pos += 1
+                    # either uncommented or blank to begin with
+                    if not self.skip_blank_lines and (
+                        self._is_line_empty(self.data[self.pos - 1]) or line
+                    ):
+                        break
+                    elif self.skip_blank_lines:
+                        ret = self._remove_empty_lines([line])
+                        if ret:
+                            line = ret[0]
+                            break
+                except IndexError:
+                    raise StopIteration
+        else:
+            while self.skipfunc(self.pos):
+                self.pos += 1
+                # assert for mypy, data is Iterator[str] or None, would error in next
+                assert self.data is not None
+                next(self.data)
+
+            while True:
+                orig_line = self._next_iter_line(row_num=self.pos + 1)
+                self.pos += 1
+
+                if orig_line is not None:
+                    line = self._check_comments([orig_line])[0]
+
+                    if self.skip_blank_lines:
+                        ret = self._remove_empty_lines([line])
+
+                        if ret:
+                            line = ret[0]
+                            break
+                    elif self._is_line_empty(orig_line) or line:
+                        break
+
+        # This was the first line of the file,
+        # which could contain the BOM at the
+        # beginning of it.
+        if self.pos == 1:
+            line = self._check_for_bom(line)
+
+        self.line_pos += 1
+        self.buf.append(line)
+        return line
+
+    def _alert_malformed(self, msg, row_num):
+        """
+        Alert a user about a malformed row.
+
+        If `self.error_bad_lines` is True, the alert will be `ParserError`.
+        If `self.warn_bad_lines` is True, the alert will be printed out.
+
+        Parameters
+        ----------
+        msg : The error message to display.
+        row_num : The row number where the parsing error occurred.
+                  Because this row number is displayed, we 1-index,
+                  even though we 0-index internally.
+        """
+        if self.error_bad_lines:
+            raise ParserError(msg)
+        elif self.warn_bad_lines:
+            base = f"Skipping line {row_num}: "
+            sys.stderr.write(base + msg + "\n")
+
+    def _next_iter_line(self, row_num):
+        """
+        Wrapper around iterating through `self.data` (CSV source).
+
+        When a CSV error is raised, we check for specific
+        error messages that allow us to customize the
+        error message displayed to the user.
+
+        Parameters
+        ----------
+        row_num : The row number of the line being parsed.
+        """
+        try:
+            # assert for mypy, data is Iterator[str] or None, would error in next
+            assert self.data is not None
+            return next(self.data)
+        except csv.Error as e:
+            if self.warn_bad_lines or self.error_bad_lines:
+                msg = str(e)
+
+                if "NULL byte" in msg or "line contains NUL" in msg:
+                    msg = (
+                        "NULL byte detected. This byte "
+                        "cannot be processed in Python's "
+                        "native csv library at the moment, "
+                        "so please pass in engine='c' instead"
+                    )
+
+                if self.skipfooter > 0:
+                    reason = (
+                        "Error could possibly be due to "
+                        "parsing errors in the skipped footer rows "
+                        "(the skipfooter keyword is only applied "
+                        "after Python's csv library has parsed "
+                        "all rows)."
+                    )
+                    msg += ". " + reason
+
+                self._alert_malformed(msg, row_num)
+            return None
+
+    def _check_comments(self, lines):
+        if self.comment is None:
+            return lines
+        ret = []
+        for line in lines:
+            rl = []
+            for x in line:
+                if (
+                    not isinstance(x, str)
+                    or self.comment not in x
+                    or x in self.na_values
+                ):
+                    rl.append(x)
+                else:
+                    x = x[: x.find(self.comment)]
+                    if len(x) > 0:
+                        rl.append(x)
+                    break
+            ret.append(rl)
+        return ret
+
+    def _remove_empty_lines(self, lines):
+        """
+        Iterate through the lines and remove any that are
+        either empty or contain only one whitespace value
+
+        Parameters
+        ----------
+        lines : array-like
+            The array of lines that we are to filter.
+
+        Returns
+        -------
+        filtered_lines : array-like
+            The same array of lines with the "empty" ones removed.
+        """
+        ret = []
+        for line in lines:
+            # Remove empty lines and lines with only one whitespace value
+            if (
+                len(line) > 1
+                or len(line) == 1
+                and (not isinstance(line[0], str) or line[0].strip())
+            ):
+                ret.append(line)
+        return ret
+
+    def _check_thousands(self, lines):
+        if self.thousands is None:
+            return lines
+
+        return self._search_replace_num_columns(
+            lines=lines, search=self.thousands, replace=""
+        )
+
+    def _search_replace_num_columns(self, lines, search, replace):
+        ret = []
+        for line in lines:
+            rl = []
+            for i, x in enumerate(line):
+                if (
+                    not isinstance(x, str)
+                    or search not in x
+                    or (self._no_thousands_columns and i in self._no_thousands_columns)
+                    or not self.num.search(x.strip())
+                ):
+                    rl.append(x)
+                else:
+                    rl.append(x.replace(search, replace))
+            ret.append(rl)
+        return ret
+
+    def _check_decimal(self, lines):
+        if self.decimal == parser_defaults["decimal"]:
+            return lines
+
+        return self._search_replace_num_columns(
+            lines=lines, search=self.decimal, replace="."
+        )
+
+    def _clear_buffer(self):
+        self.buf = []
+
+    _implicit_index = False
+
+    def _get_index_name(self, columns):
+        """
+        Try several cases to get lines:
+
+        0) There are headers on row 0 and row 1 and their
+        total summed lengths equals the length of the next line.
+        Treat row 0 as columns and row 1 as indices
+        1) Look for implicit index: there are more columns
+        on row 1 than row 0. If this is true, assume that row
+        1 lists index columns and row 0 lists normal columns.
+        2) Get index from the columns if it was listed.
+        """
+        orig_names = list(columns)
+        columns = list(columns)
+
+        try:
+            line = self._next_line()
+        except StopIteration:
+            line = None
+
+        try:
+            next_line = self._next_line()
+        except StopIteration:
+            next_line = None
+
+        # implicitly index_col=0 b/c 1 fewer column names
+        implicit_first_cols = 0
+        if line is not None:
+            # leave it 0, #2442
+            # Case 1
+            if self.index_col is not False:
+                implicit_first_cols = len(line) - self.num_original_columns
+
+            # Case 0
+            if next_line is not None:
+                if len(next_line) == len(line) + self.num_original_columns:
+                    # column and index names on diff rows
+                    self.index_col = list(range(len(line)))
+                    self.buf = self.buf[1:]
+
+                    for c in reversed(line):
+                        columns.insert(0, c)
+
+                    # Update list of original names to include all indices.
+                    orig_names = list(columns)
+                    self.num_original_columns = len(columns)
+                    return line, orig_names, columns
+
+        if implicit_first_cols > 0:
+            # Case 1
+            self._implicit_index = True
+            if self.index_col is None:
+                self.index_col = list(range(implicit_first_cols))
+
+            index_name = None
+
+        else:
+            # Case 2
+            (index_name, columns_, self.index_col) = self._clean_index_names(
+                columns, self.index_col, self.unnamed_cols
+            )
+
+        return index_name, orig_names, columns
+
+    def _rows_to_cols(self, content):
+        col_len = self.num_original_columns
+
+        if self._implicit_index:
+            col_len += len(self.index_col)
+
+        max_len = max(len(row) for row in content)
+
+        # Check that there are no rows with too many
+        # elements in their row (rows with too few
+        # elements are padded with NaN).
+        if max_len > col_len and self.index_col is not False and self.usecols is None:
+
+            footers = self.skipfooter if self.skipfooter else 0
+            bad_lines = []
+
+            iter_content = enumerate(content)
+            content_len = len(content)
+            content = []
+
+            for (i, l) in iter_content:
+                actual_len = len(l)
+
+                if actual_len > col_len:
+                    if self.error_bad_lines or self.warn_bad_lines:
+                        row_num = self.pos - (content_len - i + footers)
+                        bad_lines.append((row_num, actual_len))
+
+                        if self.error_bad_lines:
+                            break
+                else:
+                    content.append(l)
+
+            for row_num, actual_len in bad_lines:
+                msg = (
+                    f"Expected {col_len} fields in line {row_num + 1}, saw "
+                    f"{actual_len}"
+                )
+                if (
+                    self.delimiter
+                    and len(self.delimiter) > 1
+                    and self.quoting != csv.QUOTE_NONE
+                ):
+                    # see gh-13374
+                    reason = (
+                        "Error could possibly be due to quotes being "
+                        "ignored when a multi-char delimiter is used."
+                    )
+                    msg += ". " + reason
+
+                self._alert_malformed(msg, row_num + 1)
+
+        # see gh-13320
+        zipped_content = list(lib.to_object_array(content, min_width=col_len).T)
+
+        if self.usecols:
+            assert self._col_indices is not None
+            col_indices = self._col_indices
+
+            if self._implicit_index:
+                zipped_content = [
+                    a
+                    for i, a in enumerate(zipped_content)
+                    if (
+                        i < len(self.index_col)
+                        or i - len(self.index_col) in col_indices
+                    )
+                ]
+            else:
+                zipped_content = [
+                    a for i, a in enumerate(zipped_content) if i in col_indices
+                ]
+        return zipped_content
+
+    def _get_lines(self, rows=None):
+        lines = self.buf
+        new_rows = None
+
+        # already fetched some number
+        if rows is not None:
+            # we already have the lines in the buffer
+            if len(self.buf) >= rows:
+                new_rows, self.buf = self.buf[:rows], self.buf[rows:]
+
+            # need some lines
+            else:
+                rows -= len(self.buf)
+
+        if new_rows is None:
+            if isinstance(self.data, list):
+                if self.pos > len(self.data):
+                    raise StopIteration
+                if rows is None:
+                    new_rows = self.data[self.pos :]
+                    new_pos = len(self.data)
+                else:
+                    new_rows = self.data[self.pos : self.pos + rows]
+                    new_pos = self.pos + rows
+
+                # Check for stop rows. n.b.: self.skiprows is a set.
+                if self.skiprows:
+                    new_rows = [
+                        row
+                        for i, row in enumerate(new_rows)
+                        if not self.skipfunc(i + self.pos)
+                    ]
+
+                lines.extend(new_rows)
+                self.pos = new_pos
+
+            else:
+                new_rows = []
+                try:
+                    if rows is not None:
+                        for _ in range(rows):
+                            # assert for mypy, data is Iterator[str] or None, would
+                            # error in next
+                            assert self.data is not None
+                            new_rows.append(next(self.data))
+                        lines.extend(new_rows)
+                    else:
+                        rows = 0
+
+                        while True:
+                            new_row = self._next_iter_line(row_num=self.pos + rows + 1)
+                            rows += 1
+
+                            if new_row is not None:
+                                new_rows.append(new_row)
+
+                except StopIteration:
+                    if self.skiprows:
+                        new_rows = [
+                            row
+                            for i, row in enumerate(new_rows)
+                            if not self.skipfunc(i + self.pos)
+                        ]
+                    lines.extend(new_rows)
+                    if len(lines) == 0:
+                        raise
+                self.pos += len(new_rows)
+
+            self.buf = []
+        else:
+            lines = new_rows
+
+        if self.skipfooter:
+            lines = lines[: -self.skipfooter]
+
+        lines = self._check_comments(lines)
+        if self.skip_blank_lines:
+            lines = self._remove_empty_lines(lines)
+        lines = self._check_thousands(lines)
+        return self._check_decimal(lines)
+
+
+class FixedWidthReader(abc.Iterator):
+    """
+    A reader of fixed-width lines.
+    """
+
+    def __init__(self, f, colspecs, delimiter, comment, skiprows=None, infer_nrows=100):
+        self.f = f
+        self.buffer = None
+        self.delimiter = "\r\n" + delimiter if delimiter else "\n\r\t "
+        self.comment = comment
+        if colspecs == "infer":
+            self.colspecs = self.detect_colspecs(
+                infer_nrows=infer_nrows, skiprows=skiprows
+            )
+        else:
+            self.colspecs = colspecs
+
+        if not isinstance(self.colspecs, (tuple, list)):
+            raise TypeError(
+                "column specifications must be a list or tuple, "
+                f"input was a {type(colspecs).__name__}"
+            )
+
+        for colspec in self.colspecs:
+            if not (
+                isinstance(colspec, (tuple, list))
+                and len(colspec) == 2
+                and isinstance(colspec[0], (int, np.integer, type(None)))
+                and isinstance(colspec[1], (int, np.integer, type(None)))
+            ):
+                raise TypeError(
+                    "Each column specification must be "
+                    "2 element tuple or list of integers"
+                )
+
+    def get_rows(self, infer_nrows, skiprows=None):
+        """
+        Read rows from self.f, skipping as specified.
+
+        We distinguish buffer_rows (the first <= infer_nrows
+        lines) from the rows returned to detect_colspecs
+        because it's simpler to leave the other locations
+        with skiprows logic alone than to modify them to
+        deal with the fact we skipped some rows here as
+        well.
+
+        Parameters
+        ----------
+        infer_nrows : int
+            Number of rows to read from self.f, not counting
+            rows that are skipped.
+        skiprows: set, optional
+            Indices of rows to skip.
+
+        Returns
+        -------
+        detect_rows : list of str
+            A list containing the rows to read.
+
+        """
+        if skiprows is None:
+            skiprows = set()
+        buffer_rows = []
+        detect_rows = []
+        for i, row in enumerate(self.f):
+            if i not in skiprows:
+                detect_rows.append(row)
+            buffer_rows.append(row)
+            if len(detect_rows) >= infer_nrows:
+                break
+        self.buffer = iter(buffer_rows)
+        return detect_rows
+
+    def detect_colspecs(self, infer_nrows=100, skiprows=None):
+        # Regex escape the delimiters
+        delimiters = "".join(fr"\{x}" for x in self.delimiter)
+        pattern = re.compile(f"([^{delimiters}]+)")
+        rows = self.get_rows(infer_nrows, skiprows)
+        if not rows:
+            raise EmptyDataError("No rows from which to infer column width")
+        max_len = max(map(len, rows))
+        mask = np.zeros(max_len + 1, dtype=int)
+        if self.comment is not None:
+            rows = [row.partition(self.comment)[0] for row in rows]
+        for row in rows:
+            for m in pattern.finditer(row):
+                mask[m.start() : m.end()] = 1
+        shifted = np.roll(mask, 1)
+        shifted[0] = 0
+        edges = np.where((mask ^ shifted) == 1)[0]
+        edge_pairs = list(zip(edges[::2], edges[1::2]))
+        return edge_pairs
+
+    def __next__(self):
+        if self.buffer is not None:
+            try:
+                line = next(self.buffer)
+            except StopIteration:
+                self.buffer = None
+                line = next(self.f)
+        else:
+            line = next(self.f)
+        # Note: 'colspecs' is a sequence of half-open intervals.
+        return [line[fromm:to].strip(self.delimiter) for (fromm, to) in self.colspecs]
+
+
+class FixedWidthFieldParser(PythonParser):
+    """
+    Specialization that Converts fixed-width fields into DataFrames.
+    See PythonParser for details.
+    """
+
+    def __init__(self, f, **kwds):
+        # Support iterators, convert to a list.
+        self.colspecs = kwds.pop("colspecs")
+        self.infer_nrows = kwds.pop("infer_nrows")
+        PythonParser.__init__(self, f, **kwds)
+
+    def _make_reader(self, f):
+        self.data = FixedWidthReader(
+            f,
+            self.colspecs,
+            self.delimiter,
+            self.comment,
+            self.skiprows,
+            self.infer_nrows,
+        )
+
+    def _remove_empty_lines(self, lines) -> List:
+        """
+        Returns the list of lines without the empty ones. With fixed-width
+        fields, empty lines become arrays of empty strings.
+
+        See PythonParser._remove_empty_lines.
+        """
+        return [
+            line
+            for line in lines
+            if any(not isinstance(e, str) or e.strip() for e in line)
+        ]
+
+
+def count_empty_vals(vals) -> int:
+    return sum(1 for v in vals if v == "" or v is None)
+
+
+def _validate_skipfooter_arg(skipfooter):
+    """
+    Validate the 'skipfooter' parameter.
+
+    Checks whether 'skipfooter' is a non-negative integer.
+    Raises a ValueError if that is not the case.
+
+    Parameters
+    ----------
+    skipfooter : non-negative integer
+        The number of rows to skip at the end of the file.
+
+    Returns
+    -------
+    validated_skipfooter : non-negative integer
+        The original input if the validation succeeds.
+
+    Raises
+    ------
+    ValueError : 'skipfooter' was not a non-negative integer.
+    """
+    if not is_integer(skipfooter):
+        raise ValueError("skipfooter must be an integer")
+
+    if skipfooter < 0:
+        raise ValueError("skipfooter cannot be negative")
+
+    return skipfooter
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
new file mode 100644
index 0000000000000..37814a5debf2e
--- /dev/null
+++ b/pandas/io/parsers/readers.py
@@ -0,0 +1,1339 @@
+"""
+Module contains tools for processing files into DataFrames or other objects
+"""
+
+from collections import abc
+import csv
+import sys
+from textwrap import fill
+from typing import Any, Dict, Optional, Set, Type
+import warnings
+
+import numpy as np
+
+import pandas._libs.lib as lib
+from pandas._libs.parsers import STR_NA_VALUES
+from pandas._typing import DtypeArg, FilePathOrBuffer, StorageOptions, Union
+from pandas.errors import AbstractMethodError, ParserWarning
+from pandas.util._decorators import Appender
+
+from pandas.core.dtypes.common import is_file_like, is_float, is_integer, is_list_like
+
+from pandas.core import generic
+from pandas.core.frame import DataFrame
+from pandas.core.indexes.api import RangeIndex
+
+from pandas.io.common import validate_header_arg
+from pandas.io.parsers.base_parser import ParserBase, is_index_col, parser_defaults
+from pandas.io.parsers.c_parser_wrapper import CParserWrapper
+from pandas.io.parsers.python_parser import FixedWidthFieldParser, PythonParser
+
+_doc_read_csv_and_table = (
+    r"""
+{summary}
+
+Also supports optionally iterating or breaking of the file
+into chunks.
+
+Additional help can be found in the online docs for
+`IO Tools <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
+
+Parameters
+----------
+filepath_or_buffer : str, path object or file-like object
+    Any valid string path is acceptable. The string could be a URL. Valid
+    URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
+    expected. A local file could be: file://localhost/path/to/table.csv.
+
+    If you want to pass in a path object, pandas accepts any ``os.PathLike``.
+
+    By file-like object, we refer to objects with a ``read()`` method, such as
+    a file handle (e.g. via builtin ``open`` function) or ``StringIO``.
+sep : str, default {_default_sep}
+    Delimiter to use. If sep is None, the C engine cannot automatically detect
+    the separator, but the Python parsing engine can, meaning the latter will
+    be used and automatically detect the separator by Python's builtin sniffer
+    tool, ``csv.Sniffer``. In addition, separators longer than 1 character and
+    different from ``'\s+'`` will be interpreted as regular expressions and
+    will also force the use of the Python parsing engine. Note that regex
+    delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``.
+delimiter : str, default ``None``
+    Alias for sep.
+header : int, list of int, default 'infer'
+    Row number(s) to use as the column names, and the start of the
+    data.  Default behavior is to infer the column names: if no names
+    are passed the behavior is identical to ``header=0`` and column
+    names are inferred from the first line of the file, if column
+    names are passed explicitly then the behavior is identical to
+    ``header=None``. Explicitly pass ``header=0`` to be able to
+    replace existing names. The header can be a list of integers that
+    specify row locations for a multi-index on the columns
+    e.g. [0,1,3]. Intervening rows that are not specified will be
+    skipped (e.g. 2 in this example is skipped). Note that this
+    parameter ignores commented lines and empty lines if
+    ``skip_blank_lines=True``, so ``header=0`` denotes the first line of
+    data rather than the first line of the file.
+names : array-like, optional
+    List of column names to use. If the file contains a header row,
+    then you should explicitly pass ``header=0`` to override the column names.
+    Duplicates in this list are not allowed.
+index_col : int, str, sequence of int / str, or False, default ``None``
+  Column(s) to use as the row labels of the ``DataFrame``, either given as
+  string name or column index. If a sequence of int / str is given, a
+  MultiIndex is used.
+
+  Note: ``index_col=False`` can be used to force pandas to *not* use the first
+  column as the index, e.g. when you have a malformed file with delimiters at
+  the end of each line.
+usecols : list-like or callable, optional
+    Return a subset of the columns. If list-like, all elements must either
+    be positional (i.e. integer indices into the document columns) or strings
+    that correspond to column names provided either by the user in `names` or
+    inferred from the document header row(s). For example, a valid list-like
+    `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.
+    Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.
+    To instantiate a DataFrame from ``data`` with element order preserved use
+    ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns
+    in ``['foo', 'bar']`` order or
+    ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``
+    for ``['bar', 'foo']`` order.
+
+    If callable, the callable function will be evaluated against the column
+    names, returning names where the callable function evaluates to True. An
+    example of a valid callable argument would be ``lambda x: x.upper() in
+    ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster
+    parsing time and lower memory usage.
+squeeze : bool, default False
+    If the parsed data only contains one column then return a Series.
+prefix : str, optional
+    Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
+mangle_dupe_cols : bool, default True
+    Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
+    'X'...'X'. Passing in False will cause data to be overwritten if there
+    are duplicate names in the columns.
+dtype : Type name or dict of column -> type, optional
+    Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32,
+    'c': 'Int64'}}
+    Use `str` or `object` together with suitable `na_values` settings
+    to preserve and not interpret dtype.
+    If converters are specified, they will be applied INSTEAD
+    of dtype conversion.
+engine : {{'c', 'python'}}, optional
+    Parser engine to use. The C engine is faster while the python engine is
+    currently more feature-complete.
+converters : dict, optional
+    Dict of functions for converting values in certain columns. Keys can either
+    be integers or column labels.
+true_values : list, optional
+    Values to consider as True.
+false_values : list, optional
+    Values to consider as False.
+skipinitialspace : bool, default False
+    Skip spaces after delimiter.
+skiprows : list-like, int or callable, optional
+    Line numbers to skip (0-indexed) or number of lines to skip (int)
+    at the start of the file.
+
+    If callable, the callable function will be evaluated against the row
+    indices, returning True if the row should be skipped and False otherwise.
+    An example of a valid callable argument would be ``lambda x: x in [0, 2]``.
+skipfooter : int, default 0
+    Number of lines at bottom of file to skip (Unsupported with engine='c').
+nrows : int, optional
+    Number of rows of file to read. Useful for reading pieces of large files.
+na_values : scalar, str, list-like, or dict, optional
+    Additional strings to recognize as NA/NaN. If dict passed, specific
+    per-column NA values.  By default the following values are interpreted as
+    NaN: '"""
+    + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent="    ")
+    + """'.
+keep_default_na : bool, default True
+    Whether or not to include the default NaN values when parsing the data.
+    Depending on whether `na_values` is passed in, the behavior is as follows:
+
+    * If `keep_default_na` is True, and `na_values` are specified, `na_values`
+      is appended to the default NaN values used for parsing.
+    * If `keep_default_na` is True, and `na_values` are not specified, only
+      the default NaN values are used for parsing.
+    * If `keep_default_na` is False, and `na_values` are specified, only
+      the NaN values specified `na_values` are used for parsing.
+    * If `keep_default_na` is False, and `na_values` are not specified, no
+      strings will be parsed as NaN.
+
+    Note that if `na_filter` is passed in as False, the `keep_default_na` and
+    `na_values` parameters will be ignored.
+na_filter : bool, default True
+    Detect missing value markers (empty strings and the value of na_values). In
+    data without any NAs, passing na_filter=False can improve the performance
+    of reading a large file.
+verbose : bool, default False
+    Indicate number of NA values placed in non-numeric columns.
+skip_blank_lines : bool, default True
+    If True, skip over blank lines rather than interpreting as NaN values.
+parse_dates : bool or list of int or names or list of lists or dict, \
+default False
+    The behavior is as follows:
+
+    * boolean. If True -> try parsing the index.
+    * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
+      each as a separate date column.
+    * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as
+      a single date column.
+    * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call
+      result 'foo'
+
+    If a column or index cannot be represented as an array of datetimes,
+    say because of an unparsable value or a mixture of timezones, the column
+    or index will be returned unaltered as an object data type. For
+    non-standard datetime parsing, use ``pd.to_datetime`` after
+    ``pd.read_csv``. To parse an index or column with a mixture of timezones,
+    specify ``date_parser`` to be a partially-applied
+    :func:`pandas.to_datetime` with ``utc=True``. See
+    :ref:`io.csv.mixed_timezones` for more.
+
+    Note: A fast-path exists for iso8601-formatted dates.
+infer_datetime_format : bool, default False
+    If True and `parse_dates` is enabled, pandas will attempt to infer the
+    format of the datetime strings in the columns, and if it can be inferred,
+    switch to a faster method of parsing them. In some cases this can increase
+    the parsing speed by 5-10x.
+keep_date_col : bool, default False
+    If True and `parse_dates` specifies combining multiple columns then
+    keep the original columns.
+date_parser : function, optional
+    Function to use for converting a sequence of string columns to an array of
+    datetime instances. The default uses ``dateutil.parser.parser`` to do the
+    conversion. Pandas will try to call `date_parser` in three different ways,
+    advancing to the next if an exception occurs: 1) Pass one or more arrays
+    (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
+    string values from the columns defined by `parse_dates` into a single array
+    and pass that; and 3) call `date_parser` once for each row using one or
+    more strings (corresponding to the columns defined by `parse_dates`) as
+    arguments.
+dayfirst : bool, default False
+    DD/MM format dates, international and European format.
+cache_dates : bool, default True
+    If True, use a cache of unique, converted dates to apply the datetime
+    conversion. May produce significant speed-up when parsing duplicate
+    date strings, especially ones with timezone offsets.
+
+    .. versionadded:: 0.25.0
+iterator : bool, default False
+    Return TextFileReader object for iteration or getting chunks with
+    ``get_chunk()``.
+
+    .. versionchanged:: 1.2
+
+       ``TextFileReader`` is a context manager.
+chunksize : int, optional
+    Return TextFileReader object for iteration.
+    See the `IO Tools docs
+    <https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_
+    for more information on ``iterator`` and ``chunksize``.
+
+    .. versionchanged:: 1.2
+
+       ``TextFileReader`` is a context manager.
+compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
+    For on-the-fly decompression of on-disk data. If 'infer' and
+    `filepath_or_buffer` is path-like, then detect compression from the
+    following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
+    decompression). If using 'zip', the ZIP file must contain only one data
+    file to be read in. Set to None for no decompression.
+thousands : str, optional
+    Thousands separator.
+decimal : str, default '.'
+    Character to recognize as decimal point (e.g. use ',' for European data).
+lineterminator : str (length 1), optional
+    Character to break file into lines. Only valid with C parser.
+quotechar : str (length 1), optional
+    The character used to denote the start and end of a quoted item. Quoted
+    items can include the delimiter and it will be ignored.
+quoting : int or csv.QUOTE_* instance, default 0
+    Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of
+    QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).
+doublequote : bool, default ``True``
+   When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate
+   whether or not to interpret two consecutive quotechar elements INSIDE a
+   field as a single ``quotechar`` element.
+escapechar : str (length 1), optional
+    One-character string used to escape other characters.
+comment : str, optional
+    Indicates remainder of line should not be parsed. If found at the beginning
+    of a line, the line will be ignored altogether. This parameter must be a
+    single character. Like empty lines (as long as ``skip_blank_lines=True``),
+    fully commented lines are ignored by the parameter `header` but not by
+    `skiprows`. For example, if ``comment='#'``, parsing
+    ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in 'a,b,c' being
+    treated as the header.
+encoding : str, optional
+    Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python
+    standard encodings
+    <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ .
+dialect : str or csv.Dialect, optional
+    If provided, this parameter will override values (default or not) for the
+    following parameters: `delimiter`, `doublequote`, `escapechar`,
+    `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
+    override values, a ParserWarning will be issued. See csv.Dialect
+    documentation for more details.
+error_bad_lines : bool, default True
+    Lines with too many fields (e.g. a csv line with too many commas) will by
+    default cause an exception to be raised, and no DataFrame will be returned.
+    If False, then these "bad lines" will dropped from the DataFrame that is
+    returned.
+warn_bad_lines : bool, default True
+    If error_bad_lines is False, and warn_bad_lines is True, a warning for each
+    "bad line" will be output.
+delim_whitespace : bool, default False
+    Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be
+    used as the sep. Equivalent to setting ``sep='\\s+'``. If this option
+    is set to True, nothing should be passed in for the ``delimiter``
+    parameter.
+low_memory : bool, default True
+    Internally process the file in chunks, resulting in lower memory use
+    while parsing, but possibly mixed type inference.  To ensure no mixed
+    types either set False, or specify the type with the `dtype` parameter.
+    Note that the entire file is read into a single DataFrame regardless,
+    use the `chunksize` or `iterator` parameter to return the data in chunks.
+    (Only valid with C parser).
+memory_map : bool, default False
+    If a filepath is provided for `filepath_or_buffer`, map the file object
+    directly onto memory and access the data directly from there. Using this
+    option can improve performance because there is no longer any I/O overhead.
+float_precision : str, optional
+    Specifies which converter the C engine should use for floating-point
+    values. The options are ``None`` or 'high' for the ordinary converter,
+    'legacy' for the original lower precision pandas converter, and
+    'round_trip' for the round-trip converter.
+
+    .. versionchanged:: 1.2
+
+{storage_options}
+
+    .. versionadded:: 1.2
+
+Returns
+-------
+DataFrame or TextParser
+    A comma-separated values (csv) file is returned as two-dimensional
+    data structure with labeled axes.
+
+See Also
+--------
+DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
+read_csv : Read a comma-separated values (csv) file into DataFrame.
+read_fwf : Read a table of fixed-width formatted lines into DataFrame.
+
+Examples
+--------
+>>> pd.{func_name}('data.csv')  # doctest: +SKIP
+"""
+)
+
+
+_c_parser_defaults = {
+    "delim_whitespace": False,
+    "na_filter": True,
+    "low_memory": True,
+    "memory_map": False,
+    "error_bad_lines": True,
+    "warn_bad_lines": True,
+    "float_precision": None,
+}
+
+_fwf_defaults = {"colspecs": "infer", "infer_nrows": 100, "widths": None}
+
+_c_unsupported = {"skipfooter"}
+_python_unsupported = {"low_memory", "float_precision"}
+
+_deprecated_defaults: Dict[str, Any] = {}
+_deprecated_args: Set[str] = set()
+
+
+def validate_integer(name, val, min_val=0):
+    """
+    Checks whether the 'name' parameter for parsing is either
+    an integer OR float that can SAFELY be cast to an integer
+    without losing accuracy. Raises a ValueError if that is
+    not the case.
+
+    Parameters
+    ----------
+    name : string
+        Parameter name (used for error reporting)
+    val : int or float
+        The value to check
+    min_val : int
+        Minimum allowed value (val < min_val will result in a ValueError)
+    """
+    msg = f"'{name:s}' must be an integer >={min_val:d}"
+
+    if val is not None:
+        if is_float(val):
+            if int(val) != val:
+                raise ValueError(msg)
+            val = int(val)
+        elif not (is_integer(val) and val >= min_val):
+            raise ValueError(msg)
+
+    return val
+
+
+def _validate_names(names):
+    """
+    Raise ValueError if the `names` parameter contains duplicates or has an
+    invalid data type.
+
+    Parameters
+    ----------
+    names : array-like or None
+        An array containing a list of the names used for the output DataFrame.
+
+    Raises
+    ------
+    ValueError
+        If names are not unique or are not ordered (e.g. set).
+    """
+    if names is not None:
+        if len(names) != len(set(names)):
+            raise ValueError("Duplicate names are not allowed.")
+        if not (
+            is_list_like(names, allow_sets=False) or isinstance(names, abc.KeysView)
+        ):
+            raise ValueError("Names should be an ordered collection.")
+
+
+def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
+    """Generic reader of line files."""
+    if kwds.get("date_parser", None) is not None:
+        if isinstance(kwds["parse_dates"], bool):
+            kwds["parse_dates"] = True
+
+    # Extract some of the arguments (pass chunksize on).
+    iterator = kwds.get("iterator", False)
+    chunksize = validate_integer("chunksize", kwds.get("chunksize", None), 1)
+    nrows = kwds.get("nrows", None)
+
+    # Check for duplicates in names.
+    _validate_names(kwds.get("names", None))
+
+    # Create the parser.
+    parser = TextFileReader(filepath_or_buffer, **kwds)
+
+    if chunksize or iterator:
+        return parser
+
+    with parser:
+        return parser.read(nrows)
+
+
+@Appender(
+    _doc_read_csv_and_table.format(
+        func_name="read_csv",
+        summary="Read a comma-separated values (csv) file into DataFrame.",
+        _default_sep="','",
+        storage_options=generic._shared_docs["storage_options"],
+    )
+)
+def read_csv(
+    filepath_or_buffer: FilePathOrBuffer,
+    sep=lib.no_default,
+    delimiter=None,
+    # Column and Index Locations and Names
+    header="infer",
+    names=None,
+    index_col=None,
+    usecols=None,
+    squeeze=False,
+    prefix=None,
+    mangle_dupe_cols=True,
+    # General Parsing Configuration
+    dtype: Optional[DtypeArg] = None,
+    engine=None,
+    converters=None,
+    true_values=None,
+    false_values=None,
+    skipinitialspace=False,
+    skiprows=None,
+    skipfooter=0,
+    nrows=None,
+    # NA and Missing Data Handling
+    na_values=None,
+    keep_default_na=True,
+    na_filter=True,
+    verbose=False,
+    skip_blank_lines=True,
+    # Datetime Handling
+    parse_dates=False,
+    infer_datetime_format=False,
+    keep_date_col=False,
+    date_parser=None,
+    dayfirst=False,
+    cache_dates=True,
+    # Iteration
+    iterator=False,
+    chunksize=None,
+    # Quoting, Compression, and File Format
+    compression="infer",
+    thousands=None,
+    decimal: str = ".",
+    lineterminator=None,
+    quotechar='"',
+    quoting=csv.QUOTE_MINIMAL,
+    doublequote=True,
+    escapechar=None,
+    comment=None,
+    encoding=None,
+    dialect=None,
+    # Error Handling
+    error_bad_lines=True,
+    warn_bad_lines=True,
+    # Internal
+    delim_whitespace=False,
+    low_memory=_c_parser_defaults["low_memory"],
+    memory_map=False,
+    float_precision=None,
+    storage_options: StorageOptions = None,
+):
+    kwds = locals()
+    del kwds["filepath_or_buffer"]
+    del kwds["sep"]
+
+    kwds_defaults = _refine_defaults_read(
+        dialect, delimiter, delim_whitespace, engine, sep, defaults={"delimiter": ","}
+    )
+    kwds.update(kwds_defaults)
+
+    return _read(filepath_or_buffer, kwds)
+
+
+@Appender(
+    _doc_read_csv_and_table.format(
+        func_name="read_table",
+        summary="Read general delimited file into DataFrame.",
+        _default_sep=r"'\\t' (tab-stop)",
+        storage_options=generic._shared_docs["storage_options"],
+    )
+)
+def read_table(
+    filepath_or_buffer: FilePathOrBuffer,
+    sep=lib.no_default,
+    delimiter=None,
+    # Column and Index Locations and Names
+    header="infer",
+    names=None,
+    index_col=None,
+    usecols=None,
+    squeeze=False,
+    prefix=None,
+    mangle_dupe_cols=True,
+    # General Parsing Configuration
+    dtype: Optional[DtypeArg] = None,
+    engine=None,
+    converters=None,
+    true_values=None,
+    false_values=None,
+    skipinitialspace=False,
+    skiprows=None,
+    skipfooter=0,
+    nrows=None,
+    # NA and Missing Data Handling
+    na_values=None,
+    keep_default_na=True,
+    na_filter=True,
+    verbose=False,
+    skip_blank_lines=True,
+    # Datetime Handling
+    parse_dates=False,
+    infer_datetime_format=False,
+    keep_date_col=False,
+    date_parser=None,
+    dayfirst=False,
+    cache_dates=True,
+    # Iteration
+    iterator=False,
+    chunksize=None,
+    # Quoting, Compression, and File Format
+    compression="infer",
+    thousands=None,
+    decimal: str = ".",
+    lineterminator=None,
+    quotechar='"',
+    quoting=csv.QUOTE_MINIMAL,
+    doublequote=True,
+    escapechar=None,
+    comment=None,
+    encoding=None,
+    dialect=None,
+    # Error Handling
+    error_bad_lines=True,
+    warn_bad_lines=True,
+    # Internal
+    delim_whitespace=False,
+    low_memory=_c_parser_defaults["low_memory"],
+    memory_map=False,
+    float_precision=None,
+):
+    kwds = locals()
+    del kwds["filepath_or_buffer"]
+    del kwds["sep"]
+
+    kwds_defaults = _refine_defaults_read(
+        dialect, delimiter, delim_whitespace, engine, sep, defaults={"delimiter": "\t"}
+    )
+    kwds.update(kwds_defaults)
+
+    return _read(filepath_or_buffer, kwds)
+
+
+def read_fwf(
+    filepath_or_buffer: FilePathOrBuffer,
+    colspecs="infer",
+    widths=None,
+    infer_nrows=100,
+    **kwds,
+):
+    r"""
+    Read a table of fixed-width formatted lines into DataFrame.
+
+    Also supports optionally iterating or breaking of the file
+    into chunks.
+
+    Additional help can be found in the `online docs for IO Tools
+    <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
+
+    Parameters
+    ----------
+    filepath_or_buffer : str, path object or file-like object
+        Any valid string path is acceptable. The string could be a URL. Valid
+        URL schemes include http, ftp, s3, and file. For file URLs, a host is
+        expected. A local file could be:
+        ``file://localhost/path/to/table.csv``.
+
+        If you want to pass in a path object, pandas accepts any
+        ``os.PathLike``.
+
+        By file-like object, we refer to objects with a ``read()`` method,
+        such as a file handle (e.g. via builtin ``open`` function)
+        or ``StringIO``.
+    colspecs : list of tuple (int, int) or 'infer'. optional
+        A list of tuples giving the extents of the fixed-width
+        fields of each line as half-open intervals (i.e.,  [from, to[ ).
+        String value 'infer' can be used to instruct the parser to try
+        detecting the column specifications from the first 100 rows of
+        the data which are not being skipped via skiprows (default='infer').
+    widths : list of int, optional
+        A list of field widths which can be used instead of 'colspecs' if
+        the intervals are contiguous.
+    infer_nrows : int, default 100
+        The number of rows to consider when letting the parser determine the
+        `colspecs`.
+
+        .. versionadded:: 0.24.0
+    **kwds : optional
+        Optional keyword arguments can be passed to ``TextFileReader``.
+
+    Returns
+    -------
+    DataFrame or TextParser
+        A comma-separated values (csv) file is returned as two-dimensional
+        data structure with labeled axes.
+
+    See Also
+    --------
+    DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
+    read_csv : Read a comma-separated values (csv) file into DataFrame.
+
+    Examples
+    --------
+    >>> pd.read_fwf('data.csv')  # doctest: +SKIP
+    """
+    # Check input arguments.
+    if colspecs is None and widths is None:
+        raise ValueError("Must specify either colspecs or widths")
+    elif colspecs not in (None, "infer") and widths is not None:
+        raise ValueError("You must specify only one of 'widths' and 'colspecs'")
+
+    # Compute 'colspecs' from 'widths', if specified.
+    if widths is not None:
+        colspecs, col = [], 0
+        for w in widths:
+            colspecs.append((col, col + w))
+            col += w
+
+    kwds["colspecs"] = colspecs
+    kwds["infer_nrows"] = infer_nrows
+    kwds["engine"] = "python-fwf"
+    return _read(filepath_or_buffer, kwds)
+
+
+class TextFileReader(abc.Iterator):
+    """
+
+    Passed dialect overrides any of the related parser options
+
+    """
+
+    def __init__(self, f, engine=None, **kwds):
+
+        self.f = f
+
+        if engine is not None:
+            engine_specified = True
+        else:
+            engine = "python"
+            engine_specified = False
+        self.engine = engine
+        self._engine_specified = kwds.get("engine_specified", engine_specified)
+
+        _validate_skipfooter(kwds)
+
+        dialect = _extract_dialect(kwds)
+        if dialect is not None:
+            kwds = _merge_with_dialect_properties(dialect, kwds)
+
+        if kwds.get("header", "infer") == "infer":
+            kwds["header"] = 0 if kwds.get("names") is None else None
+
+        self.orig_options = kwds
+
+        # miscellanea
+        self._currow = 0
+
+        options = self._get_options_with_defaults(engine)
+        options["storage_options"] = kwds.get("storage_options", None)
+
+        self.chunksize = options.pop("chunksize", None)
+        self.nrows = options.pop("nrows", None)
+        self.squeeze = options.pop("squeeze", False)
+
+        self._check_file_or_buffer(f, engine)
+        self.options, self.engine = self._clean_options(options, engine)
+
+        if "has_index_names" in kwds:
+            self.options["has_index_names"] = kwds["has_index_names"]
+
+        self._engine = self._make_engine(self.engine)
+
+    def close(self):
+        self._engine.close()
+
+    def _get_options_with_defaults(self, engine):
+        kwds = self.orig_options
+
+        options = {}
+
+        for argname, default in parser_defaults.items():
+            value = kwds.get(argname, default)
+
+            # see gh-12935
+            if argname == "mangle_dupe_cols" and not value:
+                raise ValueError("Setting mangle_dupe_cols=False is not supported yet")
+            else:
+                options[argname] = value
+
+        for argname, default in _c_parser_defaults.items():
+            if argname in kwds:
+                value = kwds[argname]
+
+                if engine != "c" and value != default:
+                    if "python" in engine and argname not in _python_unsupported:
+                        pass
+                    elif value == _deprecated_defaults.get(argname, default):
+                        pass
+                    else:
+                        raise ValueError(
+                            f"The {repr(argname)} option is not supported with the "
+                            f"{repr(engine)} engine"
+                        )
+            else:
+                value = _deprecated_defaults.get(argname, default)
+            options[argname] = value
+
+        if engine == "python-fwf":
+            # pandas\io\parsers.py:907: error: Incompatible types in assignment
+            # (expression has type "object", variable has type "Union[int, str,
+            # None]")  [assignment]
+            for argname, default in _fwf_defaults.items():  # type: ignore[assignment]
+                options[argname] = kwds.get(argname, default)
+
+        return options
+
+    def _check_file_or_buffer(self, f, engine):
+        # see gh-16530
+        if is_file_like(f) and engine != "c" and not hasattr(f, "__next__"):
+            # The C engine doesn't need the file-like to have the "__next__"
+            # attribute. However, the Python engine explicitly calls
+            # "__next__(...)" when iterating through such an object, meaning it
+            # needs to have that attribute
+            raise ValueError(
+                "The 'python' engine cannot iterate through this file buffer."
+            )
+
+    def _clean_options(self, options, engine):
+        result = options.copy()
+
+        fallback_reason = None
+
+        # C engine not supported yet
+        if engine == "c":
+            if options["skipfooter"] > 0:
+                fallback_reason = "the 'c' engine does not support skipfooter"
+                engine = "python"
+
+        sep = options["delimiter"]
+        delim_whitespace = options["delim_whitespace"]
+
+        if sep is None and not delim_whitespace:
+            if engine == "c":
+                fallback_reason = (
+                    "the 'c' engine does not support "
+                    "sep=None with delim_whitespace=False"
+                )
+                engine = "python"
+        elif sep is not None and len(sep) > 1:
+            if engine == "c" and sep == r"\s+":
+                result["delim_whitespace"] = True
+                del result["delimiter"]
+            elif engine not in ("python", "python-fwf"):
+                # wait until regex engine integrated
+                fallback_reason = (
+                    "the 'c' engine does not support "
+                    "regex separators (separators > 1 char and "
+                    r"different from '\s+' are interpreted as regex)"
+                )
+                engine = "python"
+        elif delim_whitespace:
+            if "python" in engine:
+                result["delimiter"] = r"\s+"
+        elif sep is not None:
+            encodeable = True
+            encoding = sys.getfilesystemencoding() or "utf-8"
+            try:
+                if len(sep.encode(encoding)) > 1:
+                    encodeable = False
+            except UnicodeDecodeError:
+                encodeable = False
+            if not encodeable and engine not in ("python", "python-fwf"):
+                fallback_reason = (
+                    f"the separator encoded in {encoding} "
+                    "is > 1 char long, and the 'c' engine "
+                    "does not support such separators"
+                )
+                engine = "python"
+
+        quotechar = options["quotechar"]
+        if quotechar is not None and isinstance(quotechar, (str, bytes)):
+            if (
+                len(quotechar) == 1
+                and ord(quotechar) > 127
+                and engine not in ("python", "python-fwf")
+            ):
+                fallback_reason = (
+                    "ord(quotechar) > 127, meaning the "
+                    "quotechar is larger than one byte, "
+                    "and the 'c' engine does not support such quotechars"
+                )
+                engine = "python"
+
+        if fallback_reason and self._engine_specified:
+            raise ValueError(fallback_reason)
+
+        if engine == "c":
+            for arg in _c_unsupported:
+                del result[arg]
+
+        if "python" in engine:
+            for arg in _python_unsupported:
+                if fallback_reason and result[arg] != _c_parser_defaults[arg]:
+                    raise ValueError(
+                        "Falling back to the 'python' engine because "
+                        f"{fallback_reason}, but this causes {repr(arg)} to be "
+                        "ignored as it is not supported by the 'python' engine."
+                    )
+                del result[arg]
+
+        if fallback_reason:
+            warnings.warn(
+                (
+                    "Falling back to the 'python' engine because "
+                    f"{fallback_reason}; you can avoid this warning by specifying "
+                    "engine='python'."
+                ),
+                ParserWarning,
+                stacklevel=5,
+            )
+
+        index_col = options["index_col"]
+        names = options["names"]
+        converters = options["converters"]
+        na_values = options["na_values"]
+        skiprows = options["skiprows"]
+
+        validate_header_arg(options["header"])
+
+        for arg in _deprecated_args:
+            parser_default = _c_parser_defaults[arg]
+            depr_default = _deprecated_defaults[arg]
+            if result.get(arg, depr_default) != depr_default:
+                msg = (
+                    f"The {arg} argument has been deprecated and will be "
+                    "removed in a future version.\n\n"
+                )
+                warnings.warn(msg, FutureWarning, stacklevel=2)
+            else:
+                result[arg] = parser_default
+
+        if index_col is True:
+            raise ValueError("The value of index_col couldn't be 'True'")
+        if is_index_col(index_col):
+            if not isinstance(index_col, (list, tuple, np.ndarray)):
+                index_col = [index_col]
+        result["index_col"] = index_col
+
+        names = list(names) if names is not None else names
+
+        # type conversion-related
+        if converters is not None:
+            if not isinstance(converters, dict):
+                raise TypeError(
+                    "Type converters must be a dict or subclass, "
+                    f"input was a {type(converters).__name__}"
+                )
+        else:
+            converters = {}
+
+        # Converting values to NA
+        keep_default_na = options["keep_default_na"]
+        na_values, na_fvalues = _clean_na_values(na_values, keep_default_na)
+
+        # handle skiprows; this is internally handled by the
+        # c-engine, so only need for python parsers
+        if engine != "c":
+            if is_integer(skiprows):
+                skiprows = list(range(skiprows))
+            if skiprows is None:
+                skiprows = set()
+            elif not callable(skiprows):
+                skiprows = set(skiprows)
+
+        # put stuff back
+        result["names"] = names
+        result["converters"] = converters
+        result["na_values"] = na_values
+        result["na_fvalues"] = na_fvalues
+        result["skiprows"] = skiprows
+
+        return result, engine
+
+    def __next__(self):
+        try:
+            return self.get_chunk()
+        except StopIteration:
+            self.close()
+            raise
+
+    def _make_engine(self, engine="c"):
+        mapping: Dict[str, Type[ParserBase]] = {
+            "c": CParserWrapper,
+            "python": PythonParser,
+            "python-fwf": FixedWidthFieldParser,
+        }
+        if engine not in mapping:
+            raise ValueError(
+                f"Unknown engine: {engine} (valid options are {mapping.keys()})"
+            )
+        # error: Too many arguments for "ParserBase"
+        return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
+
+    def _failover_to_python(self):
+        raise AbstractMethodError(self)
+
+    def read(self, nrows=None):
+        nrows = validate_integer("nrows", nrows)
+        index, columns, col_dict = self._engine.read(nrows)
+
+        if index is None:
+            if col_dict:
+                # Any column is actually fine:
+                new_rows = len(next(iter(col_dict.values())))
+                index = RangeIndex(self._currow, self._currow + new_rows)
+            else:
+                new_rows = 0
+        else:
+            new_rows = len(index)
+
+        df = DataFrame(col_dict, columns=columns, index=index)
+
+        self._currow += new_rows
+
+        if self.squeeze and len(df.columns) == 1:
+            return df[df.columns[0]].copy()
+        return df
+
+    def get_chunk(self, size=None):
+        if size is None:
+            size = self.chunksize
+        if self.nrows is not None:
+            if self._currow >= self.nrows:
+                raise StopIteration
+            size = min(size, self.nrows - self._currow)
+        return self.read(nrows=size)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+
+def TextParser(*args, **kwds):
+    """
+    Converts lists of lists/tuples into DataFrames with proper type inference
+    and optional (e.g. string to datetime) conversion. Also enables iterating
+    lazily over chunks of large files
+
+    Parameters
+    ----------
+    data : file-like object or list
+    delimiter : separator character to use
+    dialect : str or csv.Dialect instance, optional
+        Ignored if delimiter is longer than 1 character
+    names : sequence, default
+    header : int, default 0
+        Row to use to parse column labels. Defaults to the first row. Prior
+        rows will be discarded
+    index_col : int or list, optional
+        Column or columns to use as the (possibly hierarchical) index
+    has_index_names: bool, default False
+        True if the cols defined in index_col have an index name and are
+        not in the header.
+    na_values : scalar, str, list-like, or dict, optional
+        Additional strings to recognize as NA/NaN.
+    keep_default_na : bool, default True
+    thousands : str, optional
+        Thousands separator
+    comment : str, optional
+        Comment out remainder of line
+    parse_dates : bool, default False
+    keep_date_col : bool, default False
+    date_parser : function, optional
+    skiprows : list of integers
+        Row numbers to skip
+    skipfooter : int
+        Number of line at bottom of file to skip
+    converters : dict, optional
+        Dict of functions for converting values in certain columns. Keys can
+        either be integers or column labels, values are functions that take one
+        input argument, the cell (not column) content, and return the
+        transformed content.
+    encoding : str, optional
+        Encoding to use for UTF when reading/writing (ex. 'utf-8')
+    squeeze : bool, default False
+        returns Series if only one column.
+    infer_datetime_format: bool, default False
+        If True and `parse_dates` is True for a column, try to infer the
+        datetime format based on the first datetime string. If the format
+        can be inferred, there often will be a large parsing speed-up.
+    float_precision : str, optional
+        Specifies which converter the C engine should use for floating-point
+        values. The options are `None` or `high` for the ordinary converter,
+        `legacy` for the original lower precision pandas converter, and
+        `round_trip` for the round-trip converter.
+
+        .. versionchanged:: 1.2
+    """
+    kwds["engine"] = "python"
+    return TextFileReader(*args, **kwds)
+
+
+def _clean_na_values(na_values, keep_default_na=True):
+
+    if na_values is None:
+        if keep_default_na:
+            na_values = STR_NA_VALUES
+        else:
+            na_values = set()
+        # pandas\io\parsers.py:3387: error: Need type annotation for
+        # 'na_fvalues' (hint: "na_fvalues: Set[<type>] = ...")  [var-annotated]
+        na_fvalues = set()  # type: ignore[var-annotated]
+    elif isinstance(na_values, dict):
+        old_na_values = na_values.copy()
+        na_values = {}  # Prevent aliasing.
+
+        # Convert the values in the na_values dictionary
+        # into array-likes for further use. This is also
+        # where we append the default NaN values, provided
+        # that `keep_default_na=True`.
+        for k, v in old_na_values.items():
+            if not is_list_like(v):
+                v = [v]
+
+            if keep_default_na:
+                v = set(v) | STR_NA_VALUES
+
+            na_values[k] = v
+        # pandas\io\parsers.py:3404: error: Incompatible types in assignment
+        # (expression has type "Dict[Any, Any]", variable has type "Set[Any]")
+        # [assignment]
+        na_fvalues = {  # type: ignore[assignment]
+            k: _floatify_na_values(v) for k, v in na_values.items()
+        }
+    else:
+        if not is_list_like(na_values):
+            na_values = [na_values]
+        na_values = _stringify_na_values(na_values)
+        if keep_default_na:
+            na_values = na_values | STR_NA_VALUES
+
+        na_fvalues = _floatify_na_values(na_values)
+
+    return na_values, na_fvalues
+
+
+def _floatify_na_values(na_values):
+    # create float versions of the na_values
+    result = set()
+    for v in na_values:
+        try:
+            v = float(v)
+            if not np.isnan(v):
+                result.add(v)
+        except (TypeError, ValueError, OverflowError):
+            pass
+    return result
+
+
+def _stringify_na_values(na_values):
+    """ return a stringified and numeric for these values """
+    result = []
+    for x in na_values:
+        result.append(str(x))
+        result.append(x)
+        try:
+            v = float(x)
+
+            # we are like 999 here
+            if v == int(v):
+                v = int(v)
+                result.append(f"{v}.0")
+                result.append(str(v))
+
+            # pandas\io\parsers.py:3522: error: Argument 1 to "append" of
+            # "list" has incompatible type "float"; expected "str"  [arg-type]
+            result.append(v)  # type: ignore[arg-type]
+        except (TypeError, ValueError, OverflowError):
+            pass
+        try:
+            # pandas\io\parsers.py:3526: error: Argument 1 to "append" of
+            # "list" has incompatible type "int"; expected "str"  [arg-type]
+            result.append(int(x))  # type: ignore[arg-type]
+        except (TypeError, ValueError, OverflowError):
+            pass
+    return set(result)
+
+
+def _refine_defaults_read(
+    dialect: Union[str, csv.Dialect],
+    delimiter: Union[str, object],
+    delim_whitespace: bool,
+    engine: str,
+    sep: Union[str, object],
+    defaults: Dict[str, Any],
+):
+    """Validate/refine default values of input parameters of read_csv, read_table.
+
+    Parameters
+    ----------
+    dialect : str or csv.Dialect
+        If provided, this parameter will override values (default or not) for the
+        following parameters: `delimiter`, `doublequote`, `escapechar`,
+        `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
+        override values, a ParserWarning will be issued. See csv.Dialect
+        documentation for more details.
+    delimiter : str or object
+        Alias for sep.
+    delim_whitespace : bool
+        Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be
+        used as the sep. Equivalent to setting ``sep='\\s+'``. If this option
+        is set to True, nothing should be passed in for the ``delimiter``
+        parameter.
+    engine : {{'c', 'python'}}
+        Parser engine to use. The C engine is faster while the python engine is
+        currently more feature-complete.
+    sep : str or object
+        A delimiter provided by the user (str) or a sentinel value, i.e.
+        pandas._libs.lib.no_default.
+    defaults: dict
+        Default values of input parameters.
+
+    Returns
+    -------
+    kwds : dict
+        Input parameters with correct values.
+
+    Raises
+    ------
+    ValueError : If a delimiter was specified with ``sep`` (or ``delimiter``) and
+        ``delim_whitespace=True``.
+    """
+    # fix types for sep, delimiter to Union(str, Any)
+    delim_default = defaults["delimiter"]
+    kwds: Dict[str, Any] = {}
+    # gh-23761
+    #
+    # When a dialect is passed, it overrides any of the overlapping
+    # parameters passed in directly. We don't want to warn if the
+    # default parameters were passed in (since it probably means
+    # that the user didn't pass them in explicitly in the first place).
+    #
+    # "delimiter" is the annoying corner case because we alias it to
+    # "sep" before doing comparison to the dialect values later on.
+    # Thus, we need a flag to indicate that we need to "override"
+    # the comparison to dialect values by checking if default values
+    # for BOTH "delimiter" and "sep" were provided.
+    if dialect is not None:
+        kwds["sep_override"] = delimiter is None and (
+            sep is lib.no_default or sep == delim_default
+        )
+
+    # Alias sep -> delimiter.
+    if delimiter is None:
+        delimiter = sep
+
+    if delim_whitespace and (delimiter is not lib.no_default):
+        raise ValueError(
+            "Specified a delimiter with both sep and "
+            "delim_whitespace=True; you can only specify one."
+        )
+
+    if delimiter is lib.no_default:
+        # assign default separator value
+        kwds["delimiter"] = delim_default
+    else:
+        kwds["delimiter"] = delimiter
+
+    if engine is not None:
+        kwds["engine_specified"] = True
+    else:
+        kwds["engine"] = "c"
+        kwds["engine_specified"] = False
+
+    return kwds
+
+
+def _extract_dialect(kwds: Dict[str, Any]) -> Optional[csv.Dialect]:
+    """
+    Extract concrete csv dialect instance.
+
+    Returns
+    -------
+    csv.Dialect or None
+    """
+    if kwds.get("dialect") is None:
+        return None
+
+    dialect = kwds["dialect"]
+    if dialect in csv.list_dialects():
+        dialect = csv.get_dialect(dialect)
+
+    _validate_dialect(dialect)
+
+    return dialect
+
+
+MANDATORY_DIALECT_ATTRS = (
+    "delimiter",
+    "doublequote",
+    "escapechar",
+    "skipinitialspace",
+    "quotechar",
+    "quoting",
+)
+
+
+def _validate_dialect(dialect: csv.Dialect) -> None:
+    """
+    Validate csv dialect instance.
+
+    Raises
+    ------
+    ValueError
+        If incorrect dialect is provided.
+    """
+    for param in MANDATORY_DIALECT_ATTRS:
+        if not hasattr(dialect, param):
+            raise ValueError(f"Invalid dialect {dialect} provided")
+
+
+def _merge_with_dialect_properties(
+    dialect: csv.Dialect,
+    defaults: Dict[str, Any],
+) -> Dict[str, Any]:
+    """
+    Merge default kwargs in TextFileReader with dialect parameters.
+
+    Parameters
+    ----------
+    dialect : csv.Dialect
+        Concrete csv dialect. See csv.Dialect documentation for more details.
+    defaults : dict
+        Keyword arguments passed to TextFileReader.
+
+    Returns
+    -------
+    kwds : dict
+        Updated keyword arguments, merged with dialect parameters.
+    """
+    kwds = defaults.copy()
+
+    for param in MANDATORY_DIALECT_ATTRS:
+        dialect_val = getattr(dialect, param)
+
+        parser_default = parser_defaults[param]
+        provided = kwds.get(param, parser_default)
+
+        # Messages for conflicting values between the dialect
+        # instance and the actual parameters provided.
+        conflict_msgs = []
+
+        # Don't warn if the default parameter was passed in,
+        # even if it conflicts with the dialect (gh-23761).
+        if provided != parser_default and provided != dialect_val:
+            msg = (
+                f"Conflicting values for '{param}': '{provided}' was "
+                f"provided, but the dialect specifies '{dialect_val}'. "
+                "Using the dialect-specified value."
+            )
+
+            # Annoying corner case for not warning about
+            # conflicts between dialect and delimiter parameter.
+            # Refer to the outer "_read_" function for more info.
+            if not (param == "delimiter" and kwds.pop("sep_override", False)):
+                conflict_msgs.append(msg)
+
+        if conflict_msgs:
+            warnings.warn("\n\n".join(conflict_msgs), ParserWarning, stacklevel=2)
+        kwds[param] = dialect_val
+    return kwds
+
+
+def _validate_skipfooter(kwds: Dict[str, Any]) -> None:
+    """
+    Check whether skipfooter is compatible with other kwargs in TextFileReader.
+
+    Parameters
+    ----------
+    kwds : dict
+        Keyword arguments passed to TextFileReader.
+
+    Raises
+    ------
+    ValueError
+        If skipfooter is not compatible with other parameters.
+    """
+    if kwds.get("skipfooter"):
+        if kwds.get("iterator") or kwds.get("chunksize"):
+            raise ValueError("'skipfooter' not supported for iteration")
+        if kwds.get("nrows"):
+            raise ValueError("'skipfooter' not supported with 'nrows'")
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
index 4fd754bf79ba2..9fd6e48cf8689 100644
--- a/pandas/tests/io/parser/common/test_common_basic.py
+++ b/pandas/tests/io/parser/common/test_common_basic.py
@@ -16,7 +16,8 @@
 from pandas import DataFrame, Index, Series, compat
 import pandas._testing as tm
 
-from pandas.io.parsers import CParserWrapper, TextFileReader
+from pandas.io.parsers import TextFileReader
+from pandas.io.parsers.c_parser_wrapper import CParserWrapper
 
 
 def test_override_set_noconvert_columns():
diff --git a/pandas/tests/io/parser/test_dialect.py b/pandas/tests/io/parser/test_dialect.py
index c12eb5ec873b2..d0ee6add9ca92 100644
--- a/pandas/tests/io/parser/test_dialect.py
+++ b/pandas/tests/io/parser/test_dialect.py
@@ -98,9 +98,9 @@ def test_dialect_conflict_except_delimiter(all_parsers, custom_dialect, arg, val
         if "value" == "dialect":  # No conflict --> no warning.
             kwds[arg] = dialect_kwargs[arg]
         elif "value" == "default":  # Default --> no warning.
-            from pandas.io.parsers import _parser_defaults
+            from pandas.io.parsers.base_parser import parser_defaults
 
-            kwds[arg] = _parser_defaults[arg]
+            kwds[arg] = parser_defaults[arg]
         else:  # Non-default + conflict with dialect --> warning.
             warning_klass = ParserWarning
             kwds[arg] = "blah"
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
index 47dc543c61bd0..5322c19a3ae50 100644
--- a/pandas/tests/io/parser/test_read_fwf.py
+++ b/pandas/tests/io/parser/test_read_fwf.py
@@ -11,11 +11,13 @@
 import numpy as np
 import pytest
 
+from pandas.errors import EmptyDataError
+
 import pandas as pd
 from pandas import DataFrame, DatetimeIndex
 import pandas._testing as tm
 
-from pandas.io.parsers import EmptyDataError, read_csv, read_fwf
+from pandas.io.parsers import read_csv, read_fwf
 
 
 def test_basic():
diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py
index 267fae760398a..2cf3d959acb48 100644
--- a/pandas/tests/io/parser/test_unsupported.py
+++ b/pandas/tests/io/parser/test_unsupported.py
@@ -14,8 +14,8 @@
 
 import pandas._testing as tm
 
-import pandas.io.parsers as parsers
 from pandas.io.parsers import read_csv
+import pandas.io.parsers.readers as parsers
 
 
 @pytest.fixture(params=["python", "python-fwf"], ids=lambda val: val)
@@ -86,7 +86,7 @@ def test_c_engine(self):
             read_csv(StringIO(data), lineterminator="~~")
 
     def test_python_engine(self, python_engine):
-        from pandas.io.parsers import _python_unsupported as py_unsupported
+        from pandas.io.parsers.readers import _python_unsupported as py_unsupported
 
         data = """1,2,3,,
 1,2,3,4,