From a2c0163d0e5fc81022d48b5ef96bbc0436fb35b6 Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <andrew.r.wieteska@gmail.com>
Date: Sat, 19 Dec 2020 02:53:41 -0500
Subject: [PATCH 1/6] test reorg

---
 pandas/tests/io/parser/test_dtypes.py | 956 +++++++++++++-------------
 1 file changed, 472 insertions(+), 484 deletions(-)

diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py
index 1e68e54b413b0..d8f819d896e55 100644
--- a/pandas/tests/io/parser/test_dtypes.py
+++ b/pandas/tests/io/parser/test_dtypes.py
@@ -17,589 +17,577 @@
 import pandas._testing as tm
 
 
-@pytest.mark.parametrize("dtype", [str, object])
-@pytest.mark.parametrize("check_orig", [True, False])
-def test_dtype_all_columns(all_parsers, dtype, check_orig):
-    # see gh-3795, gh-6607
-    parser = all_parsers
-
-    df = DataFrame(
-        np.random.rand(5, 2).round(4),
-        columns=list("AB"),
-        index=["1A", "1B", "1C", "1D", "1E"],
-    )
-
-    with tm.ensure_clean("__passing_str_as_dtype__.csv") as path:
-        df.to_csv(path)
-
-        result = parser.read_csv(path, dtype=dtype, index_col=0)
-
-        if check_orig:
-            expected = df.copy()
-            result = result.astype(float)
-        else:
-            expected = df.astype(str)
-
-        tm.assert_frame_equal(result, expected)
+class TestParserDtypesBasic:
+    @pytest.mark.parametrize("dtype", [str, object])
+    @pytest.mark.parametrize("check_orig", [True, False])
+    def test_dtype_all_columns(self, all_parsers, dtype, check_orig):
+        # see gh-3795, gh-6607
+        parser = all_parsers
+
+        df = DataFrame(
+            np.random.rand(5, 2).round(4),
+            columns=list("AB"),
+            index=["1A", "1B", "1C", "1D", "1E"],
+        )
 
+        with tm.ensure_clean("__passing_str_as_dtype__.csv") as path:
+            df.to_csv(path)
 
-def test_dtype_all_columns_empty(all_parsers):
-    # see gh-12048
-    parser = all_parsers
-    result = parser.read_csv(StringIO("A,B"), dtype=str)
+            result = parser.read_csv(path, dtype=dtype, index_col=0)
 
-    expected = DataFrame({"A": [], "B": []}, index=[], dtype=str)
-    tm.assert_frame_equal(result, expected)
+            if check_orig:
+                expected = df.copy()
+                result = result.astype(float)
+            else:
+                expected = df.astype(str)
 
+            tm.assert_frame_equal(result, expected)
 
-def test_dtype_per_column(all_parsers):
-    parser = all_parsers
-    data = """\
+    def test_dtype_per_column(self, all_parsers):
+        parser = all_parsers
+        data = """\
 one,two
 1,2.5
 2,3.5
 3,4.5
 4,5.5"""
-    expected = DataFrame(
-        [[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"]
-    )
-    expected["one"] = expected["one"].astype(np.float64)
-    expected["two"] = expected["two"].astype(object)
-
-    result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str})
-    tm.assert_frame_equal(result, expected)
+        expected = DataFrame(
+            [[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"]
+        )
+        expected["one"] = expected["one"].astype(np.float64)
+        expected["two"] = expected["two"].astype(object)
 
+        result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str})
+        tm.assert_frame_equal(result, expected)
 
-def test_invalid_dtype_per_column(all_parsers):
-    parser = all_parsers
-    data = """\
+    def test_invalid_dtype_per_column(self, all_parsers):
+        parser = all_parsers
+        data = """\
 one,two
 1,2.5
 2,3.5
 3,4.5
 4,5.5"""
 
-    with pytest.raises(TypeError, match="data type [\"']foo[\"'] not understood"):
-        parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"})
-
-
-@pytest.mark.parametrize(
-    "dtype",
-    [
-        "category",
-        CategoricalDtype(),
-        {"a": "category", "b": "category", "c": CategoricalDtype()},
-    ],
-)
-def test_categorical_dtype(all_parsers, dtype):
-    # see gh-10153
-    parser = all_parsers
-    data = """a,b,c
-1,a,3.4
-1,a,3.4
-2,b,4.5"""
-    expected = DataFrame(
-        {
-            "a": Categorical(["1", "1", "2"]),
-            "b": Categorical(["a", "a", "b"]),
-            "c": Categorical(["3.4", "3.4", "4.5"]),
-        }
+        with pytest.raises(TypeError, match="data type [\"']foo[\"'] not understood"):
+            parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"})
+
+    def test_raise_on_passed_int_dtype_with_nas(self, all_parsers):
+        # see gh-2631
+        parser = all_parsers
+        data = """YEAR, DOY, a
+    2001,106380451,10
+    2001,,11
+    2001,106380451,67"""
+
+        msg = (
+            "Integer column has NA values"
+            if parser.engine == "c"
+            else "Unable to convert column DOY"
+        )
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True
+            )
+
+    def test_dtype_with_converters(self, all_parsers):
+        parser = all_parsers
+        data = """a,b
+1.1,2.2
+1.2,2.3"""
+
+        # Dtype spec ignored if converted specified.
+        with tm.assert_produces_warning(ParserWarning):
+            result = parser.read_csv(
+                StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)}
+            )
+        expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]})
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "dtype", list(np.typecodes["AllInteger"] + np.typecodes["Float"])
     )
-    actual = parser.read_csv(StringIO(data), dtype=dtype)
-    tm.assert_frame_equal(actual, expected)
+    def test_numeric_dtype(self, all_parsers, dtype):
+        data = "0\n1"
+        parser = all_parsers
+        expected = DataFrame([0, 1], dtype=dtype)
+
+        result = parser.read_csv(StringIO(data), header=None, dtype=dtype)
+        tm.assert_frame_equal(expected, result)
+
+    def test_boolean_dtype(self, all_parsers):
+        parser = all_parsers
+        data = "\n".join(
+            [
+                "a",
+                "True",
+                "TRUE",
+                "true",
+                "1",
+                "1.0",
+                "False",
+                "FALSE",
+                "false",
+                "0",
+                "0.0",
+                "NaN",
+                "nan",
+                "NA",
+                "null",
+                "NULL",
+            ]
+        )
+
+        result = parser.read_csv(StringIO(data), dtype="boolean")
+        expected = DataFrame(
+            {
+                "a": pd.array(
+                    [
+                        True,
+                        True,
+                        True,
+                        True,
+                        True,
+                        False,
+                        False,
+                        False,
+                        False,
+                        False,
+                        None,
+                        None,
+                        None,
+                        None,
+                        None,
+                    ],
+                    dtype="boolean",
+                )
+            }
+        )
+
+        tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("dtype", [{"b": "category"}, {1: "category"}])
-def test_categorical_dtype_single(all_parsers, dtype):
-    # see gh-10153
-    parser = all_parsers
-    data = """a,b,c
+class TestParserDtypesCategorical1:
+    @pytest.mark.parametrize(
+        "dtype",
+        [
+            "category",
+            CategoricalDtype(),
+            {"a": "category", "b": "category", "c": CategoricalDtype()},
+        ],
+    )
+    def test_categorical_dtype(self, all_parsers, dtype):
+        # see gh-10153
+        parser = all_parsers
+        data = """a,b,c
 1,a,3.4
 1,a,3.4
 2,b,4.5"""
-    expected = DataFrame(
-        {"a": [1, 1, 2], "b": Categorical(["a", "a", "b"]), "c": [3.4, 3.4, 4.5]}
-    )
-    actual = parser.read_csv(StringIO(data), dtype=dtype)
-    tm.assert_frame_equal(actual, expected)
-
+        expected = DataFrame(
+            {
+                "a": Categorical(["1", "1", "2"]),
+                "b": Categorical(["a", "a", "b"]),
+                "c": Categorical(["3.4", "3.4", "4.5"]),
+            }
+        )
+        actual = parser.read_csv(StringIO(data), dtype=dtype)
+        tm.assert_frame_equal(actual, expected)
+
+    @pytest.mark.parametrize("dtype", [{"b": "category"}, {1: "category"}])
+    def test_categorical_dtype_single(self, all_parsers, dtype):
+        # see gh-10153
+        parser = all_parsers
+        data = """a,b,c
+1,a,3.4
+1,a,3.4
+2,b,4.5"""
+        expected = DataFrame(
+            {"a": [1, 1, 2], "b": Categorical(["a", "a", "b"]), "c": [3.4, 3.4, 4.5]}
+        )
+        actual = parser.read_csv(StringIO(data), dtype=dtype)
+        tm.assert_frame_equal(actual, expected)
 
-def test_categorical_dtype_unsorted(all_parsers):
-    # see gh-10153
-    parser = all_parsers
-    data = """a,b,c
+    def test_categorical_dtype_unsorted(self, all_parsers):
+        # see gh-10153
+        parser = all_parsers
+        data = """a,b,c
 1,b,3.4
 1,b,3.4
 2,a,4.5"""
-    expected = DataFrame(
-        {
-            "a": Categorical(["1", "1", "2"]),
-            "b": Categorical(["b", "b", "a"]),
-            "c": Categorical(["3.4", "3.4", "4.5"]),
-        }
-    )
-    actual = parser.read_csv(StringIO(data), dtype="category")
-    tm.assert_frame_equal(actual, expected)
-
+        expected = DataFrame(
+            {
+                "a": Categorical(["1", "1", "2"]),
+                "b": Categorical(["b", "b", "a"]),
+                "c": Categorical(["3.4", "3.4", "4.5"]),
+            }
+        )
+        actual = parser.read_csv(StringIO(data), dtype="category")
+        tm.assert_frame_equal(actual, expected)
 
-def test_categorical_dtype_missing(all_parsers):
-    # see gh-10153
-    parser = all_parsers
-    data = """a,b,c
+    def test_categorical_dtype_missing(self, all_parsers):
+        # see gh-10153
+        parser = all_parsers
+        data = """a,b,c
 1,b,3.4
 1,nan,3.4
 2,a,4.5"""
-    expected = DataFrame(
-        {
-            "a": Categorical(["1", "1", "2"]),
-            "b": Categorical(["b", np.nan, "a"]),
-            "c": Categorical(["3.4", "3.4", "4.5"]),
-        }
-    )
-    actual = parser.read_csv(StringIO(data), dtype="category")
-    tm.assert_frame_equal(actual, expected)
-
-
-@pytest.mark.slow
-def test_categorical_dtype_high_cardinality_numeric(all_parsers):
-    # see gh-18186
-    parser = all_parsers
-    data = np.sort([str(i) for i in range(524289)])
-    expected = DataFrame({"a": Categorical(data, ordered=True)})
-
-    actual = parser.read_csv(StringIO("a\n" + "\n".join(data)), dtype="category")
-    actual["a"] = actual["a"].cat.reorder_categories(
-        np.sort(actual.a.cat.categories), ordered=True
-    )
-    tm.assert_frame_equal(actual, expected)
-
-
-def test_categorical_dtype_latin1(all_parsers, csv_dir_path):
-    # see gh-10153
-    pth = os.path.join(csv_dir_path, "unicode_series.csv")
-    parser = all_parsers
-    encoding = "latin-1"
-
-    expected = parser.read_csv(pth, header=None, encoding=encoding)
-    expected[1] = Categorical(expected[1])
-
-    actual = parser.read_csv(pth, header=None, encoding=encoding, dtype={1: "category"})
-    tm.assert_frame_equal(actual, expected)
-
-
-def test_categorical_dtype_utf16(all_parsers, csv_dir_path):
-    # see gh-10153
-    pth = os.path.join(csv_dir_path, "utf16_ex.txt")
-    parser = all_parsers
-    encoding = "utf-16"
-    sep = "\t"
+        expected = DataFrame(
+            {
+                "a": Categorical(["1", "1", "2"]),
+                "b": Categorical(["b", np.nan, "a"]),
+                "c": Categorical(["3.4", "3.4", "4.5"]),
+            }
+        )
+        actual = parser.read_csv(StringIO(data), dtype="category")
+        tm.assert_frame_equal(actual, expected)
+
+    @pytest.mark.slow
+    def test_categorical_dtype_high_cardinality_numeric(self, all_parsers):
+        # see gh-18186
+        parser = all_parsers
+        data = np.sort([str(i) for i in range(524289)])
+        expected = DataFrame({"a": Categorical(data, ordered=True)})
+
+        actual = parser.read_csv(StringIO("a\n" + "\n".join(data)), dtype="category")
+        actual["a"] = actual["a"].cat.reorder_categories(
+            np.sort(actual.a.cat.categories), ordered=True
+        )
+        tm.assert_frame_equal(actual, expected)
 
-    expected = parser.read_csv(pth, sep=sep, encoding=encoding)
-    expected = expected.apply(Categorical)
+    def test_categorical_dtype_utf16(self, all_parsers, csv_dir_path):
+        # see gh-10153
+        pth = os.path.join(csv_dir_path, "utf16_ex.txt")
+        parser = all_parsers
+        encoding = "utf-16"
+        sep = "\t"
 
-    actual = parser.read_csv(pth, sep=sep, encoding=encoding, dtype="category")
-    tm.assert_frame_equal(actual, expected)
+        expected = parser.read_csv(pth, sep=sep, encoding=encoding)
+        expected = expected.apply(Categorical)
 
+        actual = parser.read_csv(pth, sep=sep, encoding=encoding, dtype="category")
+        tm.assert_frame_equal(actual, expected)
 
-def test_categorical_dtype_chunksize_infer_categories(all_parsers):
-    # see gh-10153
-    parser = all_parsers
-    data = """a,b
+    def test_categorical_dtype_chunksize_infer_categories(self, all_parsers):
+        # see gh-10153
+        parser = all_parsers
+        data = """a,b
 1,a
 1,b
 1,b
 2,c"""
-    expecteds = [
-        DataFrame({"a": [1, 1], "b": Categorical(["a", "b"])}),
-        DataFrame({"a": [1, 2], "b": Categorical(["b", "c"])}, index=[2, 3]),
-    ]
-    with parser.read_csv(
-        StringIO(data), dtype={"b": "category"}, chunksize=2
-    ) as actuals:
-        for actual, expected in zip(actuals, expecteds):
-            tm.assert_frame_equal(actual, expected)
-
-
-def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
-    # see gh-10153
-    parser = all_parsers
-    data = """a,b
+        expecteds = [
+            DataFrame({"a": [1, 1], "b": Categorical(["a", "b"])}),
+            DataFrame({"a": [1, 2], "b": Categorical(["b", "c"])}, index=[2, 3]),
+        ]
+        with parser.read_csv(
+            StringIO(data), dtype={"b": "category"}, chunksize=2
+        ) as actuals:
+            for actual, expected in zip(actuals, expecteds):
+                tm.assert_frame_equal(actual, expected)
+
+    def test_categorical_dtype_chunksize_explicit_categories(self, all_parsers):
+        # see gh-10153
+        parser = all_parsers
+        data = """a,b
 1,a
 1,b
 1,b
 2,c"""
-    cats = ["a", "b", "c"]
-    expecteds = [
-        DataFrame({"a": [1, 1], "b": Categorical(["a", "b"], categories=cats)}),
-        DataFrame(
-            {"a": [1, 2], "b": Categorical(["b", "c"], categories=cats)}, index=[2, 3]
-        ),
-    ]
-    dtype = CategoricalDtype(cats)
-    with parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2) as actuals:
-        for actual, expected in zip(actuals, expecteds):
-            tm.assert_frame_equal(actual, expected)
-
-
-@pytest.mark.parametrize("ordered", [False, True])
-@pytest.mark.parametrize(
-    "categories",
-    [["a", "b", "c"], ["a", "c", "b"], ["a", "b", "c", "d"], ["c", "b", "a"]],
-)
-def test_categorical_category_dtype(all_parsers, categories, ordered):
-    parser = all_parsers
-    data = """a,b
+        cats = ["a", "b", "c"]
+        expecteds = [
+            DataFrame({"a": [1, 1], "b": Categorical(["a", "b"], categories=cats)}),
+            DataFrame(
+                {"a": [1, 2], "b": Categorical(["b", "c"], categories=cats)},
+                index=[2, 3],
+            ),
+        ]
+        dtype = CategoricalDtype(cats)
+        with parser.read_csv(
+            StringIO(data), dtype={"b": dtype}, chunksize=2
+        ) as actuals:
+            for actual, expected in zip(actuals, expecteds):
+                tm.assert_frame_equal(actual, expected)
+
+
+class TestParserDtypesCategorical2:
+    def test_categorical_dtype_latin1(self, all_parsers, csv_dir_path):
+        # see gh-10153
+        pth = os.path.join(csv_dir_path, "unicode_series.csv")
+        parser = all_parsers
+        encoding = "latin-1"
+
+        expected = parser.read_csv(pth, header=None, encoding=encoding)
+        expected[1] = Categorical(expected[1])
+
+        actual = parser.read_csv(
+            pth, header=None, encoding=encoding, dtype={1: "category"}
+        )
+        tm.assert_frame_equal(actual, expected)
+
+    @pytest.mark.parametrize("ordered", [False, True])
+    @pytest.mark.parametrize(
+        "categories",
+        [["a", "b", "c"], ["a", "c", "b"], ["a", "b", "c", "d"], ["c", "b", "a"]],
+    )
+    def test_categorical_category_dtype(self, all_parsers, categories, ordered):
+        parser = all_parsers
+        data = """a,b
 1,a
 1,b
 1,b
 2,c"""
-    expected = DataFrame(
-        {
-            "a": [1, 1, 1, 2],
-            "b": Categorical(
-                ["a", "b", "b", "c"], categories=categories, ordered=ordered
-            ),
-        }
-    )
-
-    dtype = {"b": CategoricalDtype(categories=categories, ordered=ordered)}
-    result = parser.read_csv(StringIO(data), dtype=dtype)
-    tm.assert_frame_equal(result, expected)
+        expected = DataFrame(
+            {
+                "a": [1, 1, 1, 2],
+                "b": Categorical(
+                    ["a", "b", "b", "c"], categories=categories, ordered=ordered
+                ),
+            }
+        )
 
+        dtype = {"b": CategoricalDtype(categories=categories, ordered=ordered)}
+        result = parser.read_csv(StringIO(data), dtype=dtype)
+        tm.assert_frame_equal(result, expected)
 
-def test_categorical_category_dtype_unsorted(all_parsers):
-    parser = all_parsers
-    data = """a,b
+    def test_categorical_category_dtype_unsorted(self, all_parsers):
+        parser = all_parsers
+        data = """a,b
 1,a
 1,b
 1,b
 2,c"""
-    dtype = CategoricalDtype(["c", "b", "a"])
-    expected = DataFrame(
-        {
-            "a": [1, 1, 1, 2],
-            "b": Categorical(["a", "b", "b", "c"], categories=["c", "b", "a"]),
-        }
-    )
-
-    result = parser.read_csv(StringIO(data), dtype={"b": dtype})
-    tm.assert_frame_equal(result, expected)
-
-
-def test_categorical_coerces_numeric(all_parsers):
-    parser = all_parsers
-    dtype = {"b": CategoricalDtype([1, 2, 3])}
-
-    data = "b\n1\n1\n2\n3"
-    expected = DataFrame({"b": Categorical([1, 1, 2, 3])})
-
-    result = parser.read_csv(StringIO(data), dtype=dtype)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_categorical_coerces_datetime(all_parsers):
-    parser = all_parsers
-    dti = pd.DatetimeIndex(["2017-01-01", "2018-01-01", "2019-01-01"], freq=None)
-    dtype = {"b": CategoricalDtype(dti)}
-
-    data = "b\n2017-01-01\n2018-01-01\n2019-01-01"
-    expected = DataFrame({"b": Categorical(dtype["b"].categories)})
-
-    result = parser.read_csv(StringIO(data), dtype=dtype)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_categorical_coerces_timestamp(all_parsers):
-    parser = all_parsers
-    dtype = {"b": CategoricalDtype([Timestamp("2014")])}
-
-    data = "b\n2014-01-01\n2014-01-01T00:00:00"
-    expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)})
-
-    result = parser.read_csv(StringIO(data), dtype=dtype)
-    tm.assert_frame_equal(result, expected)
-
+        dtype = CategoricalDtype(["c", "b", "a"])
+        expected = DataFrame(
+            {
+                "a": [1, 1, 1, 2],
+                "b": Categorical(["a", "b", "b", "c"], categories=["c", "b", "a"]),
+            }
+        )
 
-def test_categorical_coerces_timedelta(all_parsers):
-    parser = all_parsers
-    dtype = {"b": CategoricalDtype(pd.to_timedelta(["1H", "2H", "3H"]))}
+        result = parser.read_csv(StringIO(data), dtype={"b": dtype})
+        tm.assert_frame_equal(result, expected)
 
-    data = "b\n1H\n2H\n3H"
-    expected = DataFrame({"b": Categorical(dtype["b"].categories)})
+    def test_categorical_coerces_numeric(self, all_parsers):
+        parser = all_parsers
+        dtype = {"b": CategoricalDtype([1, 2, 3])}
 
-    result = parser.read_csv(StringIO(data), dtype=dtype)
-    tm.assert_frame_equal(result, expected)
+        data = "b\n1\n1\n2\n3"
+        expected = DataFrame({"b": Categorical([1, 1, 2, 3])})
 
+        result = parser.read_csv(StringIO(data), dtype=dtype)
+        tm.assert_frame_equal(result, expected)
 
-@pytest.mark.parametrize(
-    "data",
-    [
-        "b\nTrue\nFalse\nNA\nFalse",
-        "b\ntrue\nfalse\nNA\nfalse",
-        "b\nTRUE\nFALSE\nNA\nFALSE",
-        "b\nTrue\nFalse\nNA\nFALSE",
-    ],
-)
-def test_categorical_dtype_coerces_boolean(all_parsers, data):
-    # see gh-20498
-    parser = all_parsers
-    dtype = {"b": CategoricalDtype([False, True])}
-    expected = DataFrame({"b": Categorical([True, False, None, False])})
+    def test_categorical_coerces_datetime(self, all_parsers):
+        parser = all_parsers
+        dti = pd.DatetimeIndex(["2017-01-01", "2018-01-01", "2019-01-01"], freq=None)
+        dtype = {"b": CategoricalDtype(dti)}
 
-    result = parser.read_csv(StringIO(data), dtype=dtype)
-    tm.assert_frame_equal(result, expected)
+        data = "b\n2017-01-01\n2018-01-01\n2019-01-01"
+        expected = DataFrame({"b": Categorical(dtype["b"].categories)})
 
+        result = parser.read_csv(StringIO(data), dtype=dtype)
+        tm.assert_frame_equal(result, expected)
 
-def test_categorical_unexpected_categories(all_parsers):
-    parser = all_parsers
-    dtype = {"b": CategoricalDtype(["a", "b", "d", "e"])}
+    def test_categorical_coerces_timestamp(self, all_parsers):
+        parser = all_parsers
+        dtype = {"b": CategoricalDtype([Timestamp("2014")])}
 
-    data = "b\nd\na\nc\nd"  # Unexpected c
-    expected = DataFrame({"b": Categorical(list("dacd"), dtype=dtype["b"])})
+        data = "b\n2014-01-01\n2014-01-01T00:00:00"
+        expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)})
 
-    result = parser.read_csv(StringIO(data), dtype=dtype)
-    tm.assert_frame_equal(result, expected)
+        result = parser.read_csv(StringIO(data), dtype=dtype)
+        tm.assert_frame_equal(result, expected)
 
+    def test_categorical_coerces_timedelta(self, all_parsers):
+        parser = all_parsers
+        dtype = {"b": CategoricalDtype(pd.to_timedelta(["1H", "2H", "3H"]))}
 
-def test_empty_pass_dtype(all_parsers):
-    parser = all_parsers
+        data = "b\n1H\n2H\n3H"
+        expected = DataFrame({"b": Categorical(dtype["b"].categories)})
 
-    data = "one,two"
-    result = parser.read_csv(StringIO(data), dtype={"one": "u1"})
+        result = parser.read_csv(StringIO(data), dtype=dtype)
+        tm.assert_frame_equal(result, expected)
 
-    expected = DataFrame(
-        {"one": np.empty(0, dtype="u1"), "two": np.empty(0, dtype=object)},
-        index=Index([], dtype=object),
+    @pytest.mark.parametrize(
+        "data",
+        [
+            "b\nTrue\nFalse\nNA\nFalse",
+            "b\ntrue\nfalse\nNA\nfalse",
+            "b\nTRUE\nFALSE\nNA\nFALSE",
+            "b\nTrue\nFalse\nNA\nFALSE",
+        ],
     )
-    tm.assert_frame_equal(result, expected)
+    def test_categorical_dtype_coerces_boolean(self, all_parsers, data):
+        # see gh-20498
+        parser = all_parsers
+        dtype = {"b": CategoricalDtype([False, True])}
+        expected = DataFrame({"b": Categorical([True, False, None, False])})
 
+        result = parser.read_csv(StringIO(data), dtype=dtype)
+        tm.assert_frame_equal(result, expected)
 
-def test_empty_with_index_pass_dtype(all_parsers):
-    parser = all_parsers
+    def test_categorical_unexpected_categories(self, all_parsers):
+        parser = all_parsers
+        dtype = {"b": CategoricalDtype(["a", "b", "d", "e"])}
 
-    data = "one,two"
-    result = parser.read_csv(
-        StringIO(data), index_col=["one"], dtype={"one": "u1", 1: "f"}
-    )
+        data = "b\nd\na\nc\nd"  # Unexpected c
+        expected = DataFrame({"b": Categorical(list("dacd"), dtype=dtype["b"])})
 
-    expected = DataFrame(
-        {"two": np.empty(0, dtype="f")}, index=Index([], dtype="u1", name="one")
-    )
-    tm.assert_frame_equal(result, expected)
+        result = parser.read_csv(StringIO(data), dtype=dtype)
+        tm.assert_frame_equal(result, expected)
 
 
-def test_empty_with_multi_index_pass_dtype(all_parsers):
-    parser = all_parsers
+class TestParserDtypesEmpty:
+    def test_dtype_all_columns_empty(self, all_parsers):
+        # see gh-12048
+        parser = all_parsers
+        result = parser.read_csv(StringIO("A,B"), dtype=str)
 
-    data = "one,two,three"
-    result = parser.read_csv(
-        StringIO(data), index_col=["one", "two"], dtype={"one": "u1", 1: "f8"}
-    )
-
-    exp_idx = MultiIndex.from_arrays(
-        [np.empty(0, dtype="u1"), np.empty(0, dtype=np.float64)], names=["one", "two"]
-    )
-    expected = DataFrame({"three": np.empty(0, dtype=object)}, index=exp_idx)
-    tm.assert_frame_equal(result, expected)
+        expected = DataFrame({"A": [], "B": []}, index=[], dtype=str)
+        tm.assert_frame_equal(result, expected)
 
+    def test_empty_pass_dtype(self, all_parsers):
+        parser = all_parsers
 
-def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
-    parser = all_parsers
+        data = "one,two"
+        result = parser.read_csv(StringIO(data), dtype={"one": "u1"})
 
-    data = "one,one"
-    result = parser.read_csv(StringIO(data), dtype={"one": "u1", "one.1": "f"})
+        expected = DataFrame(
+            {"one": np.empty(0, dtype="u1"), "two": np.empty(0, dtype=object)},
+            index=Index([], dtype=object),
+        )
+        tm.assert_frame_equal(result, expected)
 
-    expected = DataFrame(
-        {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")},
-        index=Index([], dtype=object),
-    )
-    tm.assert_frame_equal(result, expected)
+    def test_empty_with_index_pass_dtype(self, all_parsers):
+        parser = all_parsers
 
+        data = "one,two"
+        result = parser.read_csv(
+            StringIO(data), index_col=["one"], dtype={"one": "u1", 1: "f"}
+        )
 
-def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
-    parser = all_parsers
+        expected = DataFrame(
+            {"two": np.empty(0, dtype="f")}, index=Index([], dtype="u1", name="one")
+        )
+        tm.assert_frame_equal(result, expected)
 
-    data = "one,one"
-    result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
+    def test_empty_with_multi_index_pass_dtype(self, all_parsers):
+        parser = all_parsers
 
-    expected = DataFrame(
-        {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")},
-        index=Index([], dtype=object),
-    )
-    tm.assert_frame_equal(result, expected)
+        data = "one,two,three"
+        result = parser.read_csv(
+            StringIO(data), index_col=["one", "two"], dtype={"one": "u1", 1: "f8"}
+        )
 
+        exp_idx = MultiIndex.from_arrays(
+            [np.empty(0, dtype="u1"), np.empty(0, dtype=np.float64)],
+            names=["one", "two"],
+        )
+        expected = DataFrame({"three": np.empty(0, dtype=object)}, index=exp_idx)
+        tm.assert_frame_equal(result, expected)
 
-def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers):
-    # see gh-9424
-    parser = all_parsers
-    expected = concat(
-        [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")],
-        axis=1,
-    )
-    expected.index = expected.index.astype(object)
+    def test_empty_with_mangled_column_pass_dtype_by_names(self, all_parsers):
+        parser = all_parsers
 
-    data = "one,one"
-    result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
-    tm.assert_frame_equal(result, expected)
+        data = "one,one"
+        result = parser.read_csv(StringIO(data), dtype={"one": "u1", "one.1": "f"})
 
+        expected = DataFrame(
+            {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")},
+            index=Index([], dtype=object),
+        )
+        tm.assert_frame_equal(result, expected)
 
-def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers):
-    # see gh-9424
-    parser = all_parsers
-    expected = concat(
-        [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")],
-        axis=1,
-    )
-    expected.index = expected.index.astype(object)
+    def test_empty_with_mangled_column_pass_dtype_by_indexes(self, all_parsers):
+        parser = all_parsers
 
-    with pytest.raises(ValueError, match="Duplicate names"):
-        data = ""
-        parser.read_csv(StringIO(data), names=["one", "one"], dtype={0: "u1", 1: "f"})
+        data = "one,one"
+        result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
 
+        expected = DataFrame(
+            {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")},
+            index=Index([], dtype=object),
+        )
+        tm.assert_frame_equal(result, expected)
 
-def test_raise_on_passed_int_dtype_with_nas(all_parsers):
-    # see gh-2631
-    parser = all_parsers
-    data = """YEAR, DOY, a
-2001,106380451,10
-2001,,11
-2001,106380451,67"""
+    def test_empty_with_dup_column_pass_dtype_by_indexes(self, all_parsers):
+        # see gh-9424
+        parser = all_parsers
+        expected = concat(
+            [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")],
+            axis=1,
+        )
+        expected.index = expected.index.astype(object)
 
-    msg = (
-        "Integer column has NA values"
-        if parser.engine == "c"
-        else "Unable to convert column DOY"
-    )
-    with pytest.raises(ValueError, match=msg):
-        parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True)
+        data = "one,one"
+        result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
+        tm.assert_frame_equal(result, expected)
 
+    def test_empty_with_dup_column_pass_dtype_by_indexes_raises(self, all_parsers):
+        # see gh-9424
+        parser = all_parsers
+        expected = concat(
+            [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")],
+            axis=1,
+        )
+        expected.index = expected.index.astype(object)
 
-def test_dtype_with_converters(all_parsers):
-    parser = all_parsers
-    data = """a,b
-1.1,2.2
-1.2,2.3"""
+        with pytest.raises(ValueError, match="Duplicate names"):
+            data = ""
+            parser.read_csv(
+                StringIO(data), names=["one", "one"], dtype={0: "u1", 1: "f"}
+            )
 
-    # Dtype spec ignored if converted specified.
-    with tm.assert_produces_warning(ParserWarning):
-        result = parser.read_csv(
-            StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)}
-        )
-    expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]})
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "dtype,expected",
-    [
-        (np.float64, DataFrame(columns=["a", "b"], dtype=np.float64)),
-        ("category", DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[])),
-        (
-            {"a": "category", "b": "category"},
-            DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]),
-        ),
-        ("datetime64[ns]", DataFrame(columns=["a", "b"], dtype="datetime64[ns]")),
-        (
-            "timedelta64[ns]",
-            DataFrame(
-                {
-                    "a": Series([], dtype="timedelta64[ns]"),
-                    "b": Series([], dtype="timedelta64[ns]"),
-                },
-                index=[],
+    @pytest.mark.parametrize(
+        "dtype,expected",
+        [
+            (np.float64, DataFrame(columns=["a", "b"], dtype=np.float64)),
+            (
+                "category",
+                DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]),
             ),
-        ),
-        (
-            {"a": np.int64, "b": np.int32},
-            DataFrame(
-                {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
-                index=[],
+            (
+                {"a": "category", "b": "category"},
+                DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]),
             ),
-        ),
-        (
-            {0: np.int64, 1: np.int32},
-            DataFrame(
-                {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
-                index=[],
+            ("datetime64[ns]", DataFrame(columns=["a", "b"], dtype="datetime64[ns]")),
+            (
+                "timedelta64[ns]",
+                DataFrame(
+                    {
+                        "a": Series([], dtype="timedelta64[ns]"),
+                        "b": Series([], dtype="timedelta64[ns]"),
+                    },
+                    index=[],
+                ),
             ),
-        ),
-        (
-            {"a": np.int64, 1: np.int32},
-            DataFrame(
-                {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
-                index=[],
+            (
+                {"a": np.int64, "b": np.int32},
+                DataFrame(
+                    {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
+                    index=[],
+                ),
             ),
-        ),
-    ],
-)
-def test_empty_dtype(all_parsers, dtype, expected):
-    # see gh-14712
-    parser = all_parsers
-    data = "a,b"
-
-    result = parser.read_csv(StringIO(data), header=0, dtype=dtype)
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "dtype", list(np.typecodes["AllInteger"] + np.typecodes["Float"])
-)
-def test_numeric_dtype(all_parsers, dtype):
-    data = "0\n1"
-    parser = all_parsers
-    expected = DataFrame([0, 1], dtype=dtype)
-
-    result = parser.read_csv(StringIO(data), header=None, dtype=dtype)
-    tm.assert_frame_equal(expected, result)
-
-
-def test_boolean_dtype(all_parsers):
-    parser = all_parsers
-    data = "\n".join(
-        [
-            "a",
-            "True",
-            "TRUE",
-            "true",
-            "1",
-            "1.0",
-            "False",
-            "FALSE",
-            "false",
-            "0",
-            "0.0",
-            "NaN",
-            "nan",
-            "NA",
-            "null",
-            "NULL",
-        ]
-    )
-
-    result = parser.read_csv(StringIO(data), dtype="boolean")
-    expected = DataFrame(
-        {
-            "a": pd.array(
-                [
-                    True,
-                    True,
-                    True,
-                    True,
-                    True,
-                    False,
-                    False,
-                    False,
-                    False,
-                    False,
-                    None,
-                    None,
-                    None,
-                    None,
-                    None,
-                ],
-                dtype="boolean",
-            )
-        }
+            (
+                {0: np.int64, 1: np.int32},
+                DataFrame(
+                    {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
+                    index=[],
+                ),
+            ),
+            (
+                {"a": np.int64, 1: np.int32},
+                DataFrame(
+                    {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
+                    index=[],
+                ),
+            ),
+        ],
     )
+    def test_empty_dtype(self, all_parsers, dtype, expected):
+        # see gh-14712
+        parser = all_parsers
+        data = "a,b"
 
-    tm.assert_frame_equal(result, expected)
+        result = parser.read_csv(StringIO(data), header=0, dtype=dtype)
+        tm.assert_frame_equal(result, expected)

From 347751f4e073e6a5c65566169ad2c029741fed6f Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <andrew.r.wieteska@gmail.com>
Date: Sat, 19 Dec 2020 02:54:51 -0500
Subject: [PATCH 2/6] test reorg

---
 pandas/tests/io/parser/test_usecols.py | 1005 ++++++++++++------------
 1 file changed, 507 insertions(+), 498 deletions(-)

diff --git a/pandas/tests/io/parser/test_usecols.py b/pandas/tests/io/parser/test_usecols.py
index 7cdfb7d11ed83..e75046d3017f2 100644
--- a/pandas/tests/io/parser/test_usecols.py
+++ b/pandas/tests/io/parser/test_usecols.py
@@ -22,544 +22,553 @@
 )
 
 
-def test_raise_on_mixed_dtype_usecols(all_parsers):
-    # See gh-12678
-    data = """a,b,c
-        1000,2000,3000
-        4000,5000,6000
-        """
-    usecols = [0, "b", 2]
-    parser = all_parsers
-
-    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
-        parser.read_csv(StringIO(data), usecols=usecols)
-
-
-@pytest.mark.parametrize("usecols", [(1, 2), ("b", "c")])
-def test_usecols(all_parsers, usecols):
-    data = """\
-a,b,c
-1,2,3
-4,5,6
-7,8,9
-10,11,12"""
-    parser = all_parsers
-    result = parser.read_csv(StringIO(data), usecols=usecols)
-
-    expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"])
-    tm.assert_frame_equal(result, expected)
-
-
-def test_usecols_with_names(all_parsers):
-    data = """\
-a,b,c
-1,2,3
-4,5,6
-7,8,9
-10,11,12"""
-    parser = all_parsers
-    names = ["foo", "bar"]
-    result = parser.read_csv(StringIO(data), names=names, usecols=[1, 2], header=0)
-
-    expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=names)
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "names,usecols", [(["b", "c"], [1, 2]), (["a", "b", "c"], ["b", "c"])]
-)
-def test_usecols_relative_to_names(all_parsers, names, usecols):
-    data = """\
-1,2,3
-4,5,6
-7,8,9
-10,11,12"""
-    parser = all_parsers
-    result = parser.read_csv(StringIO(data), names=names, header=None, usecols=usecols)
-
-    expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"])
-    tm.assert_frame_equal(result, expected)
-
-
-def test_usecols_relative_to_names2(all_parsers):
-    # see gh-5766
-    data = """\
-1,2,3
-4,5,6
-7,8,9
-10,11,12"""
-    parser = all_parsers
-    result = parser.read_csv(
-        StringIO(data), names=["a", "b"], header=None, usecols=[0, 1]
+class TestParserUsecolsBasic:
+    def test_raise_on_mixed_dtype_usecols(self, all_parsers):
+        # See gh-12678
+        data = """a,b,c
+            1000,2000,3000
+            4000,5000,6000
+            """
+        usecols = [0, "b", 2]
+        parser = all_parsers
+
+        with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+            parser.read_csv(StringIO(data), usecols=usecols)
+
+    @pytest.mark.parametrize("usecols", [(1, 2), ("b", "c")])
+    def test_usecols(self, all_parsers, usecols):
+        data = """\
+    a,b,c
+    1,2,3
+    4,5,6
+    7,8,9
+    10,11,12"""
+        parser = all_parsers
+        result = parser.read_csv(StringIO(data), usecols=usecols)
+
+        expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_usecols_with_names(self, all_parsers):
+        data = """\
+    a,b,c
+    1,2,3
+    4,5,6
+    7,8,9
+    10,11,12"""
+        parser = all_parsers
+        names = ["foo", "bar"]
+        result = parser.read_csv(StringIO(data), names=names, usecols=[1, 2], header=0)
+
+        expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=names)
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "names,usecols", [(["b", "c"], [1, 2]), (["a", "b", "c"], ["b", "c"])]
     )
+    def test_usecols_relative_to_names(self, all_parsers, names, usecols):
+        data = """\
+    1,2,3
+    4,5,6
+    7,8,9
+    10,11,12"""
+        parser = all_parsers
+        result = parser.read_csv(
+            StringIO(data), names=names, header=None, usecols=usecols
+        )
+
+        expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"])
+        tm.assert_frame_equal(result, expected)
 
-    expected = DataFrame([[1, 2], [4, 5], [7, 8], [10, 11]], columns=["a", "b"])
-    tm.assert_frame_equal(result, expected)
+    def test_usecols_relative_to_names2(self, all_parsers):
+        # see gh-5766
+        data = """\
+    1,2,3
+    4,5,6
+    7,8,9
+    10,11,12"""
+        parser = all_parsers
+        result = parser.read_csv(
+            StringIO(data), names=["a", "b"], header=None, usecols=[0, 1]
+        )
+
+        expected = DataFrame([[1, 2], [4, 5], [7, 8], [10, 11]], columns=["a", "b"])
+        tm.assert_frame_equal(result, expected)
 
+    def test_usecols_name_length_conflict(self, all_parsers):
+        data = """\
+    1,2,3
+    4,5,6
+    7,8,9
+    10,11,12"""
+        parser = all_parsers
+        msg = "Number of passed names did not match number of header fields in the file"
 
-def test_usecols_name_length_conflict(all_parsers):
-    data = """\
-1,2,3
-4,5,6
-7,8,9
-10,11,12"""
-    parser = all_parsers
-    msg = "Number of passed names did not match number of header fields in the file"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), names=["a", "b"], header=None, usecols=[1])
 
-    with pytest.raises(ValueError, match=msg):
-        parser.read_csv(StringIO(data), names=["a", "b"], header=None, usecols=[1])
+    def test_usecols_single_string(self, all_parsers):
+        # see gh-20558
+        parser = all_parsers
+        data = """foo, bar, baz
+    1000, 2000, 3000
+    4000, 5000, 6000"""
 
+        with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+            parser.read_csv(StringIO(data), usecols="foo")
 
-def test_usecols_single_string(all_parsers):
-    # see gh-20558
-    parser = all_parsers
-    data = """foo, bar, baz
-1000, 2000, 3000
-4000, 5000, 6000"""
+    @pytest.mark.parametrize(
+        "data", ["a,b,c,d\n1,2,3,4\n5,6,7,8", "a,b,c,d\n1,2,3,4,\n5,6,7,8,"]
+    )
+    def test_usecols_index_col_false(self, all_parsers, data):
+        # see gh-9082
+        parser = all_parsers
+        usecols = ["a", "c", "d"]
+        expected = DataFrame({"a": [1, 5], "c": [3, 7], "d": [4, 8]})
 
-    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
-        parser.read_csv(StringIO(data), usecols="foo")
+        result = parser.read_csv(StringIO(data), usecols=usecols, index_col=False)
+        tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize("index_col", ["b", 0])
+    @pytest.mark.parametrize("usecols", [["b", "c"], [1, 2]])
+    def test_usecols_index_col_conflict(self, all_parsers, usecols, index_col):
+        # see gh-4201: test that index_col as integer reflects usecols
+        parser = all_parsers
+        data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
+        expected = DataFrame({"c": [1, 2]}, index=Index(["a", "b"], name="b"))
 
-@pytest.mark.parametrize(
-    "data", ["a,b,c,d\n1,2,3,4\n5,6,7,8", "a,b,c,d\n1,2,3,4,\n5,6,7,8,"]
-)
-def test_usecols_index_col_false(all_parsers, data):
-    # see gh-9082
-    parser = all_parsers
-    usecols = ["a", "c", "d"]
-    expected = DataFrame({"a": [1, 5], "c": [3, 7], "d": [4, 8]})
+        result = parser.read_csv(StringIO(data), usecols=usecols, index_col=index_col)
+        tm.assert_frame_equal(result, expected)
 
-    result = parser.read_csv(StringIO(data), usecols=usecols, index_col=False)
-    tm.assert_frame_equal(result, expected)
+    def test_usecols_index_col_conflict2(self, all_parsers):
+        # see gh-4201: test that index_col as integer reflects usecols
+        parser = all_parsers
+        data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
 
+        expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")})
+        expected = expected.set_index(["b", "c"])
 
-@pytest.mark.parametrize("index_col", ["b", 0])
-@pytest.mark.parametrize("usecols", [["b", "c"], [1, 2]])
-def test_usecols_index_col_conflict(all_parsers, usecols, index_col):
-    # see gh-4201: test that index_col as integer reflects usecols
-    parser = all_parsers
-    data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
-    expected = DataFrame({"c": [1, 2]}, index=Index(["a", "b"], name="b"))
+        result = parser.read_csv(
+            StringIO(data), usecols=["b", "c", "d"], index_col=["b", "c"]
+        )
+        tm.assert_frame_equal(result, expected)
 
-    result = parser.read_csv(StringIO(data), usecols=usecols, index_col=index_col)
-    tm.assert_frame_equal(result, expected)
+    def test_usecols_implicit_index_col(self, all_parsers):
+        # see gh-2654
+        parser = all_parsers
+        data = "a,b,c\n4,apple,bat,5.7\n8,orange,cow,10"
 
+        result = parser.read_csv(StringIO(data), usecols=["a", "b"])
+        expected = DataFrame(
+            {"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8]
+        )
+        tm.assert_frame_equal(result, expected)
 
-def test_usecols_index_col_conflict2(all_parsers):
-    # see gh-4201: test that index_col as integer reflects usecols
-    parser = all_parsers
-    data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
+    def test_usecols_regex_sep(self, all_parsers):
+        # see gh-2733
+        parser = all_parsers
+        data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
+        result = parser.read_csv(StringIO(data), sep=r"\s+", usecols=("a", "b"))
 
-    expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")})
-    expected = expected.set_index(["b", "c"])
+        expected = DataFrame(
+            {"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8]
+        )
+        tm.assert_frame_equal(result, expected)
 
-    result = parser.read_csv(
-        StringIO(data), usecols=["b", "c", "d"], index_col=["b", "c"]
-    )
-    tm.assert_frame_equal(result, expected)
+    def test_usecols_with_whitespace(self, all_parsers):
+        parser = all_parsers
+        data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
 
+        result = parser.read_csv(
+            StringIO(data), delim_whitespace=True, usecols=("a", "b")
+        )
+        expected = DataFrame(
+            {"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8]
+        )
+        tm.assert_frame_equal(result, expected)
 
-def test_usecols_implicit_index_col(all_parsers):
-    # see gh-2654
-    parser = all_parsers
-    data = "a,b,c\n4,apple,bat,5.7\n8,orange,cow,10"
+    @pytest.mark.parametrize(
+        "usecols,expected",
+        [
+            # Column selection by index.
+            ([0, 1], DataFrame(data=[[1000, 2000], [4000, 5000]], columns=["2", "0"])),
+            # Column selection by name.
+            (
+                ["0", "1"],
+                DataFrame(data=[[2000, 3000], [5000, 6000]], columns=["0", "1"]),
+            ),
+        ],
+    )
+    def test_usecols_with_integer_like_header(self, all_parsers, usecols, expected):
+        parser = all_parsers
+        data = """2,0,1
+    1000,2000,3000
+    4000,5000,6000"""
 
-    result = parser.read_csv(StringIO(data), usecols=["a", "b"])
-    expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
-    tm.assert_frame_equal(result, expected)
+        result = parser.read_csv(StringIO(data), usecols=usecols)
+        tm.assert_frame_equal(result, expected)
 
+    def test_empty_usecols(self, all_parsers):
+        data = "a,b,c\n1,2,3\n4,5,6"
+        expected = DataFrame()
+        parser = all_parsers
 
-def test_usecols_regex_sep(all_parsers):
-    # see gh-2733
-    parser = all_parsers
-    data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
-    result = parser.read_csv(StringIO(data), sep=r"\s+", usecols=("a", "b"))
+        result = parser.read_csv(StringIO(data), usecols=set())
+        tm.assert_frame_equal(result, expected)
 
-    expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
-    tm.assert_frame_equal(result, expected)
+    def test_np_array_usecols(self, all_parsers):
+        # see gh-12546
+        parser = all_parsers
+        data = "a,b,c\n1,2,3"
+        usecols = np.array(["a", "b"])
 
+        expected = DataFrame([[1, 2]], columns=usecols)
+        result = parser.read_csv(StringIO(data), usecols=usecols)
+        tm.assert_frame_equal(result, expected)
 
-def test_usecols_with_whitespace(all_parsers):
-    parser = all_parsers
-    data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
+    @pytest.mark.parametrize(
+        "usecols,expected",
+        [
+            (
+                lambda x: x.upper() in ["AAA", "BBB", "DDD"],
+                DataFrame(
+                    {
+                        "AaA": {
+                            0: 0.056674972999999997,
+                            1: 2.6132309819999997,
+                            2: 3.5689350380000002,
+                        },
+                        "bBb": {0: 8, 1: 2, 2: 7},
+                        "ddd": {0: "a", 1: "b", 2: "a"},
+                    }
+                ),
+            ),
+            (lambda x: False, DataFrame()),
+        ],
+    )
+    def test_callable_usecols(self, all_parsers, usecols, expected):
+        # see gh-14154
+        data = """AaA,bBb,CCC,ddd
+    0.056674973,8,True,a
+    2.613230982,2,False,b
+    3.568935038,7,False,a"""
+        parser = all_parsers
+
+        result = parser.read_csv(StringIO(data), usecols=usecols)
+        tm.assert_frame_equal(result, expected)
 
-    result = parser.read_csv(StringIO(data), delim_whitespace=True, usecols=("a", "b"))
-    expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
-    tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize("usecols", [["a", "c"], lambda x: x in ["a", "c"]])
+    def test_incomplete_first_row(self, all_parsers, usecols):
+        # see gh-6710
+        data = "1,2\n1,2,3"
+        parser = all_parsers
+        names = ["a", "b", "c"]
+        expected = DataFrame({"a": [1, 1], "c": [np.nan, 3]})
 
+        result = parser.read_csv(StringIO(data), names=names, usecols=usecols)
+        tm.assert_frame_equal(result, expected)
 
-@pytest.mark.parametrize(
-    "usecols,expected",
-    [
-        # Column selection by index.
-        ([0, 1], DataFrame(data=[[1000, 2000], [4000, 5000]], columns=["2", "0"])),
-        # Column selection by name.
-        (["0", "1"], DataFrame(data=[[2000, 3000], [5000, 6000]], columns=["0", "1"])),
-    ],
-)
-def test_usecols_with_integer_like_header(all_parsers, usecols, expected):
-    parser = all_parsers
-    data = """2,0,1
-1000,2000,3000
-4000,5000,6000"""
-
-    result = parser.read_csv(StringIO(data), usecols=usecols)
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
-def test_usecols_with_parse_dates(all_parsers, usecols):
-    # see gh-9755
-    data = """a,b,c,d,e
-0,1,20140101,0900,4
-0,1,20140102,1000,4"""
-    parser = all_parsers
-    parse_dates = [[1, 2]]
-
-    cols = {
-        "a": [0, 0],
-        "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
-    }
-    expected = DataFrame(cols, columns=["c_d", "a"])
-    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_usecols_with_parse_dates2(all_parsers):
-    # see gh-13604
-    parser = all_parsers
-    data = """2008-02-07 09:40,1032.43
-2008-02-07 09:50,1042.54
-2008-02-07 10:00,1051.65"""
-
-    names = ["date", "values"]
-    usecols = names[:]
-    parse_dates = [0]
-
-    index = Index(
+    @pytest.mark.parametrize(
+        "data,usecols,kwargs,expected",
         [
-            Timestamp("2008-02-07 09:40"),
-            Timestamp("2008-02-07 09:50"),
-            Timestamp("2008-02-07 10:00"),
+            # see gh-8985
+            (
+                "19,29,39\n" * 2 + "10,20,30,40",
+                [0, 1, 2],
+                {"header": None},
+                DataFrame([[19, 29, 39], [19, 29, 39], [10, 20, 30]]),
+            ),
+            # see gh-9549
+            (
+                ("A,B,C\n1,2,3\n3,4,5\n1,2,4,5,1,6\n1,2,3,,,1,\n1,2,3\n5,6,7"),
+                ["A", "B", "C"],
+                {},
+                DataFrame(
+                    {
+                        "A": [1, 3, 1, 1, 1, 5],
+                        "B": [2, 4, 2, 2, 2, 6],
+                        "C": [3, 5, 4, 3, 3, 7],
+                    }
+                ),
+            ),
         ],
-        name="date",
     )
-    cols = {"values": [1032.43, 1042.54, 1051.65]}
-    expected = DataFrame(cols, index=index)
-
-    result = parser.read_csv(
-        StringIO(data),
-        parse_dates=parse_dates,
-        index_col=0,
-        usecols=usecols,
-        header=None,
-        names=names,
-    )
-    tm.assert_frame_equal(result, expected)
-
-
-def test_usecols_with_parse_dates3(all_parsers):
-    # see gh-14792
-    parser = all_parsers
-    data = """a,b,c,d,e,f,g,h,i,j
-2016/09/21,1,1,2,3,4,5,6,7,8"""
-
-    usecols = list("abcdefghij")
-    parse_dates = [0]
-
-    cols = {
-        "a": Timestamp("2016-09-21"),
-        "b": [1],
-        "c": [1],
-        "d": [2],
-        "e": [3],
-        "f": [4],
-        "g": [5],
-        "h": [6],
-        "i": [7],
-        "j": [8],
-    }
-    expected = DataFrame(cols, columns=usecols)
-
-    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_usecols_with_parse_dates4(all_parsers):
-    data = "a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8"
-    usecols = list("abcdefghij")
-    parse_dates = [[0, 1]]
-    parser = all_parsers
-
-    cols = {
-        "a_b": "2016/09/21 1",
-        "c": [1],
-        "d": [2],
-        "e": [3],
-        "f": [4],
-        "g": [5],
-        "h": [6],
-        "i": [7],
-        "j": [8],
-    }
-    expected = DataFrame(cols, columns=["a_b"] + list("cdefghij"))
-
-    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
-@pytest.mark.parametrize(
-    "names",
-    [
-        list("abcde"),  # Names span all columns in original data.
-        list("acd"),  # Names span only the selected columns.
-    ],
-)
-def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names):
-    # see gh-9755
-    s = """0,1,20140101,0900,4
-0,1,20140102,1000,4"""
-    parse_dates = [[1, 2]]
-    parser = all_parsers
-
-    cols = {
-        "a": [0, 0],
-        "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
-    }
-    expected = DataFrame(cols, columns=["c_d", "a"])
-
-    result = parser.read_csv(
-        StringIO(s), names=names, parse_dates=parse_dates, usecols=usecols
-    )
-    tm.assert_frame_equal(result, expected)
-
-
-def test_usecols_with_unicode_strings(all_parsers):
-    # see gh-13219
-    data = """AAA,BBB,CCC,DDD
-0.056674973,8,True,a
-2.613230982,2,False,b
-3.568935038,7,False,a"""
-    parser = all_parsers
-
-    exp_data = {
-        "AAA": {0: 0.056674972999999997, 1: 2.6132309819999997, 2: 3.5689350380000002},
-        "BBB": {0: 8, 1: 2, 2: 7},
-    }
-    expected = DataFrame(exp_data)
-
-    result = parser.read_csv(StringIO(data), usecols=["AAA", "BBB"])
-    tm.assert_frame_equal(result, expected)
-
-
-def test_usecols_with_single_byte_unicode_strings(all_parsers):
-    # see gh-13219
-    data = """A,B,C,D
-0.056674973,8,True,a
-2.613230982,2,False,b
-3.568935038,7,False,a"""
-    parser = all_parsers
-
-    exp_data = {
-        "A": {0: 0.056674972999999997, 1: 2.6132309819999997, 2: 3.5689350380000002},
-        "B": {0: 8, 1: 2, 2: 7},
-    }
-    expected = DataFrame(exp_data)
-
-    result = parser.read_csv(StringIO(data), usecols=["A", "B"])
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("usecols", [["AAA", b"BBB"], [b"AAA", "BBB"]])
-def test_usecols_with_mixed_encoding_strings(all_parsers, usecols):
-    data = """AAA,BBB,CCC,DDD
-0.056674973,8,True,a
-2.613230982,2,False,b
-3.568935038,7,False,a"""
-    parser = all_parsers
-
-    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
-        parser.read_csv(StringIO(data), usecols=usecols)
-
-
-@pytest.mark.parametrize("usecols", [["あああ", "いい"], ["あああ", "いい"]])
-def test_usecols_with_multi_byte_characters(all_parsers, usecols):
-    data = """あああ,いい,ううう,ええええ
-0.056674973,8,True,a
-2.613230982,2,False,b
-3.568935038,7,False,a"""
-    parser = all_parsers
-
-    exp_data = {
-        "あああ": {0: 0.056674972999999997, 1: 2.6132309819999997, 2: 3.5689350380000002},
-        "いい": {0: 8, 1: 2, 2: 7},
-    }
-    expected = DataFrame(exp_data)
-
-    result = parser.read_csv(StringIO(data), usecols=usecols)
-    tm.assert_frame_equal(result, expected)
-
-
-def test_empty_usecols(all_parsers):
-    data = "a,b,c\n1,2,3\n4,5,6"
-    expected = DataFrame()
-    parser = all_parsers
-
-    result = parser.read_csv(StringIO(data), usecols=set())
-    tm.assert_frame_equal(result, expected)
-
-
-def test_np_array_usecols(all_parsers):
-    # see gh-12546
-    parser = all_parsers
-    data = "a,b,c\n1,2,3"
-    usecols = np.array(["a", "b"])
-
-    expected = DataFrame([[1, 2]], columns=usecols)
-    result = parser.read_csv(StringIO(data), usecols=usecols)
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "usecols,expected",
-    [
-        (
-            lambda x: x.upper() in ["AAA", "BBB", "DDD"],
-            DataFrame(
-                {
-                    "AaA": {
-                        0: 0.056674972999999997,
-                        1: 2.6132309819999997,
-                        2: 3.5689350380000002,
-                    },
-                    "bBb": {0: 8, 1: 2, 2: 7},
-                    "ddd": {0: "a", 1: "b", 2: "a"},
-                }
-            ),
-        ),
-        (lambda x: False, DataFrame()),
-    ],
-)
-def test_callable_usecols(all_parsers, usecols, expected):
-    # see gh-14154
-    data = """AaA,bBb,CCC,ddd
-0.056674973,8,True,a
-2.613230982,2,False,b
-3.568935038,7,False,a"""
-    parser = all_parsers
+    def test_uneven_length_cols(self, all_parsers, data, usecols, kwargs, expected):
+        # see gh-8985
+        parser = all_parsers
+        result = parser.read_csv(StringIO(data), usecols=usecols, **kwargs)
+        tm.assert_frame_equal(result, expected)
 
-    result = parser.read_csv(StringIO(data), usecols=usecols)
-    tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize(
+        "usecols,kwargs,expected,msg",
+        [
+            (
+                ["a", "b", "c", "d"],
+                {},
+                DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}),
+                None,
+            ),
+            (
+                ["a", "b", "c", "f"],
+                {},
+                None,
+                _msg_validate_usecols_names.format(r"\['f'\]"),
+            ),
+            (["a", "b", "f"], {}, None, _msg_validate_usecols_names.format(r"\['f'\]")),
+            (
+                ["a", "b", "f", "g"],
+                {},
+                None,
+                _msg_validate_usecols_names.format(r"\[('f', 'g'|'g', 'f')\]"),
+            ),
+            # see gh-14671
+            (
+                None,
+                {"header": 0, "names": ["A", "B", "C", "D"]},
+                DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 7], "D": [4, 8]}),
+                None,
+            ),
+            (
+                ["A", "B", "C", "f"],
+                {"header": 0, "names": ["A", "B", "C", "D"]},
+                None,
+                _msg_validate_usecols_names.format(r"\['f'\]"),
+            ),
+            (
+                ["A", "B", "f"],
+                {"names": ["A", "B", "C", "D"]},
+                None,
+                _msg_validate_usecols_names.format(r"\['f'\]"),
+            ),
+        ],
+    )
+    def test_raises_on_usecols_names_mismatch(
+        self, all_parsers, usecols, kwargs, expected, msg
+    ):
+        data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
+        kwargs.update(usecols=usecols)
+        parser = all_parsers
+
+        if expected is None:
+            with pytest.raises(ValueError, match=msg):
+                parser.read_csv(StringIO(data), **kwargs)
+        else:
+            result = parser.read_csv(StringIO(data), **kwargs)
+            tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("usecols", [["A", "C"], [0, 2]])
+    def test_usecols_subset_names_mismatch_orig_columns(self, all_parsers, usecols):
+        data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
+        names = ["A", "B", "C", "D"]
+        parser = all_parsers
+
+        result = parser.read_csv(StringIO(data), header=0, names=names, usecols=usecols)
+        expected = DataFrame({"A": [1, 5], "C": [3, 7]})
+        tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("usecols", [["a", "c"], lambda x: x in ["a", "c"]])
-def test_incomplete_first_row(all_parsers, usecols):
-    # see gh-6710
-    data = "1,2\n1,2,3"
-    parser = all_parsers
-    names = ["a", "b", "c"]
-    expected = DataFrame({"a": [1, 1], "c": [np.nan, 3]})
+class TestUsecolsParseDates:
+    @pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
+    def test_usecols_with_parse_dates(self, all_parsers, usecols):
+        # see gh-9755
+        data = """a,b,c,d,e
+    0,1,20140101,0900,4
+    0,1,20140102,1000,4"""
+        parser = all_parsers
+        parse_dates = [[1, 2]]
+
+        cols = {
+            "a": [0, 0],
+            "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
+        }
+        expected = DataFrame(cols, columns=["c_d", "a"])
+        result = parser.read_csv(
+            StringIO(data), usecols=usecols, parse_dates=parse_dates
+        )
+        tm.assert_frame_equal(result, expected)
 
-    result = parser.read_csv(StringIO(data), names=names, usecols=usecols)
-    tm.assert_frame_equal(result, expected)
+    def test_usecols_with_parse_dates2(self, all_parsers):
+        # see gh-13604
+        parser = all_parsers
+        data = """2008-02-07 09:40,1032.43
+    2008-02-07 09:50,1042.54
+    2008-02-07 10:00,1051.65"""
+
+        names = ["date", "values"]
+        usecols = names[:]
+        parse_dates = [0]
+
+        index = Index(
+            [
+                Timestamp("2008-02-07 09:40"),
+                Timestamp("2008-02-07 09:50"),
+                Timestamp("2008-02-07 10:00"),
+            ],
+            name="date",
+        )
+        cols = {"values": [1032.43, 1042.54, 1051.65]}
+        expected = DataFrame(cols, index=index)
+
+        result = parser.read_csv(
+            StringIO(data),
+            parse_dates=parse_dates,
+            index_col=0,
+            usecols=usecols,
+            header=None,
+            names=names,
+        )
+        tm.assert_frame_equal(result, expected)
 
+    def test_usecols_with_parse_dates3(self, all_parsers):
+        # see gh-14792
+        parser = all_parsers
+        data = """a,b,c,d,e,f,g,h,i,j
+    2016/09/21,1,1,2,3,4,5,6,7,8"""
+
+        usecols = list("abcdefghij")
+        parse_dates = [0]
+
+        cols = {
+            "a": Timestamp("2016-09-21"),
+            "b": [1],
+            "c": [1],
+            "d": [2],
+            "e": [3],
+            "f": [4],
+            "g": [5],
+            "h": [6],
+            "i": [7],
+            "j": [8],
+        }
+        expected = DataFrame(cols, columns=usecols)
+
+        result = parser.read_csv(
+            StringIO(data), usecols=usecols, parse_dates=parse_dates
+        )
+        tm.assert_frame_equal(result, expected)
 
-@pytest.mark.parametrize(
-    "data,usecols,kwargs,expected",
-    [
-        # see gh-8985
-        (
-            "19,29,39\n" * 2 + "10,20,30,40",
-            [0, 1, 2],
-            {"header": None},
-            DataFrame([[19, 29, 39], [19, 29, 39], [10, 20, 30]]),
-        ),
-        # see gh-9549
-        (
-            ("A,B,C\n1,2,3\n3,4,5\n1,2,4,5,1,6\n1,2,3,,,1,\n1,2,3\n5,6,7"),
-            ["A", "B", "C"],
-            {},
-            DataFrame(
-                {
-                    "A": [1, 3, 1, 1, 1, 5],
-                    "B": [2, 4, 2, 2, 2, 6],
-                    "C": [3, 5, 4, 3, 3, 7],
-                }
-            ),
-        ),
-    ],
-)
-def test_uneven_length_cols(all_parsers, data, usecols, kwargs, expected):
-    # see gh-8985
-    parser = all_parsers
-    result = parser.read_csv(StringIO(data), usecols=usecols, **kwargs)
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "usecols,kwargs,expected,msg",
-    [
-        (
-            ["a", "b", "c", "d"],
-            {},
-            DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}),
-            None,
-        ),
-        (
-            ["a", "b", "c", "f"],
-            {},
-            None,
-            _msg_validate_usecols_names.format(r"\['f'\]"),
-        ),
-        (["a", "b", "f"], {}, None, _msg_validate_usecols_names.format(r"\['f'\]")),
-        (
-            ["a", "b", "f", "g"],
-            {},
-            None,
-            _msg_validate_usecols_names.format(r"\[('f', 'g'|'g', 'f')\]"),
-        ),
-        # see gh-14671
-        (
-            None,
-            {"header": 0, "names": ["A", "B", "C", "D"]},
-            DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 7], "D": [4, 8]}),
-            None,
-        ),
-        (
-            ["A", "B", "C", "f"],
-            {"header": 0, "names": ["A", "B", "C", "D"]},
-            None,
-            _msg_validate_usecols_names.format(r"\['f'\]"),
-        ),
-        (
-            ["A", "B", "f"],
-            {"names": ["A", "B", "C", "D"]},
-            None,
-            _msg_validate_usecols_names.format(r"\['f'\]"),
-        ),
-    ],
-)
-def test_raises_on_usecols_names_mismatch(all_parsers, usecols, kwargs, expected, msg):
-    data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
-    kwargs.update(usecols=usecols)
-    parser = all_parsers
+    def test_usecols_with_parse_dates4(self, all_parsers):
+        data = "a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8"
+        usecols = list("abcdefghij")
+        parse_dates = [[0, 1]]
+        parser = all_parsers
+
+        cols = {
+            "a_b": "2016/09/21 1",
+            "c": [1],
+            "d": [2],
+            "e": [3],
+            "f": [4],
+            "g": [5],
+            "h": [6],
+            "i": [7],
+            "j": [8],
+        }
+        expected = DataFrame(cols, columns=["a_b"] + list("cdefghij"))
+
+        result = parser.read_csv(
+            StringIO(data), usecols=usecols, parse_dates=parse_dates
+        )
+        tm.assert_frame_equal(result, expected)
 
-    if expected is None:
-        with pytest.raises(ValueError, match=msg):
-            parser.read_csv(StringIO(data), **kwargs)
-    else:
-        result = parser.read_csv(StringIO(data), **kwargs)
+    @pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
+    @pytest.mark.parametrize(
+        "names",
+        [
+            list("abcde"),  # Names span all columns in original data.
+            list("acd"),  # Names span only the selected columns.
+        ],
+    )
+    def test_usecols_with_parse_dates_and_names(self, all_parsers, usecols, names):
+        # see gh-9755
+        s = """0,1,20140101,0900,4
+    0,1,20140102,1000,4"""
+        parse_dates = [[1, 2]]
+        parser = all_parsers
+
+        cols = {
+            "a": [0, 0],
+            "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
+        }
+        expected = DataFrame(cols, columns=["c_d", "a"])
+
+        result = parser.read_csv(
+            StringIO(s), names=names, parse_dates=parse_dates, usecols=usecols
+        )
         tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize("usecols", [["A", "C"], [0, 2]])
-def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols):
-    data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
-    names = ["A", "B", "C", "D"]
-    parser = all_parsers
+class TestUsecolsStrings:
+    def test_usecols_with_unicode_strings(self, all_parsers):
+        # see gh-13219
+        data = """AAA,BBB,CCC,DDD
+    0.056674973,8,True,a
+    2.613230982,2,False,b
+    3.568935038,7,False,a"""
+        parser = all_parsers
+
+        exp_data = {
+            "AAA": {
+                0: 0.056674972999999997,
+                1: 2.6132309819999997,
+                2: 3.5689350380000002,
+            },
+            "BBB": {0: 8, 1: 2, 2: 7},
+        }
+        expected = DataFrame(exp_data)
+
+        result = parser.read_csv(StringIO(data), usecols=["AAA", "BBB"])
+        tm.assert_frame_equal(result, expected)
+
+    def test_usecols_with_single_byte_unicode_strings(self, all_parsers):
+        # see gh-13219
+        data = """A,B,C,D
+    0.056674973,8,True,a
+    2.613230982,2,False,b
+    3.568935038,7,False,a"""
+        parser = all_parsers
+
+        exp_data = {
+            "A": {
+                0: 0.056674972999999997,
+                1: 2.6132309819999997,
+                2: 3.5689350380000002,
+            },
+            "B": {0: 8, 1: 2, 2: 7},
+        }
+        expected = DataFrame(exp_data)
+
+        result = parser.read_csv(StringIO(data), usecols=["A", "B"])
+        tm.assert_frame_equal(result, expected)
 
-    result = parser.read_csv(StringIO(data), header=0, names=names, usecols=usecols)
-    expected = DataFrame({"A": [1, 5], "C": [3, 7]})
-    tm.assert_frame_equal(result, expected)
+    @pytest.mark.parametrize("usecols", [["AAA", b"BBB"], [b"AAA", "BBB"]])
+    def test_usecols_with_mixed_encoding_strings(self, all_parsers, usecols):
+        data = """AAA,BBB,CCC,DDD
+    0.056674973,8,True,a
+    2.613230982,2,False,b
+    3.568935038,7,False,a"""
+        parser = all_parsers
+
+        with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+            parser.read_csv(StringIO(data), usecols=usecols)
+
+    @pytest.mark.parametrize("usecols", [["あああ", "いい"], ["あああ", "いい"]])
+    def test_usecols_with_multi_byte_characters(self, all_parsers, usecols):
+        data = """あああ,いい,ううう,ええええ
+    0.056674973,8,True,a
+    2.613230982,2,False,b
+    3.568935038,7,False,a"""
+        parser = all_parsers
+
+        exp_data = {
+            "あああ": {
+                0: 0.056674972999999997,
+                1: 2.6132309819999997,
+                2: 3.5689350380000002,
+            },
+            "いい": {0: 8, 1: 2, 2: 7},
+        }
+        expected = DataFrame(exp_data)
+
+        result = parser.read_csv(StringIO(data), usecols=usecols)
+        tm.assert_frame_equal(result, expected)

From 6f92c2399fa96cbb38d362eb1ff2cd9c0afc44e3 Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <andrew.r.wieteska@gmail.com>
Date: Wed, 30 Dec 2020 23:31:09 -0500
Subject: [PATCH 3/6] split test_dtypes.py into multiple files

---
 pandas/tests/io/parser/dtypes/test_basic.py   | 167 +++++
 .../io/parser/dtypes/test_categorical.py      | 294 +++++++++
 pandas/tests/io/parser/dtypes/test_empty.py   | 172 +++++
 pandas/tests/io/parser/test_dtypes.py         | 593 ------------------
 4 files changed, 633 insertions(+), 593 deletions(-)
 create mode 100644 pandas/tests/io/parser/dtypes/test_basic.py
 create mode 100644 pandas/tests/io/parser/dtypes/test_categorical.py
 create mode 100644 pandas/tests/io/parser/dtypes/test_empty.py
 delete mode 100644 pandas/tests/io/parser/test_dtypes.py

diff --git a/pandas/tests/io/parser/dtypes/test_basic.py b/pandas/tests/io/parser/dtypes/test_basic.py
new file mode 100644
index 0000000000000..e416d8dcdd905
--- /dev/null
+++ b/pandas/tests/io/parser/dtypes/test_basic.py
@@ -0,0 +1,167 @@
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas.errors import ParserWarning
+
+import pandas as pd
+from pandas import DataFrame
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize("dtype", [str, object])
+@pytest.mark.parametrize("check_orig", [True, False])
+def test_dtype_all_columns(all_parsers, dtype, check_orig):
+    # see gh-3795, gh-6607
+    parser = all_parsers
+
+    df = DataFrame(
+        np.random.rand(5, 2).round(4),
+        columns=list("AB"),
+        index=["1A", "1B", "1C", "1D", "1E"],
+    )
+
+    with tm.ensure_clean("__passing_str_as_dtype__.csv") as path:
+        df.to_csv(path)
+
+        result = parser.read_csv(path, dtype=dtype, index_col=0)
+
+        if check_orig:
+            expected = df.copy()
+            result = result.astype(float)
+        else:
+            expected = df.astype(str)
+
+        tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_per_column(all_parsers):
+    parser = all_parsers
+    data = """\
+one,two
+1,2.5
+2,3.5
+3,4.5
+4,5.5"""
+    expected = DataFrame(
+        [[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"]
+    )
+    expected["one"] = expected["one"].astype(np.float64)
+    expected["two"] = expected["two"].astype(object)
+
+    result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_invalid_dtype_per_column(all_parsers):
+    parser = all_parsers
+    data = """\
+one,two
+1,2.5
+2,3.5
+3,4.5
+4,5.5"""
+
+    with pytest.raises(TypeError, match="data type [\"']foo[\"'] not understood"):
+        parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"})
+
+
+def test_raise_on_passed_int_dtype_with_nas(all_parsers):
+    # see gh-2631
+    parser = all_parsers
+    data = """YEAR, DOY, a
+2001,106380451,10
+2001,,11
+2001,106380451,67"""
+
+    msg = (
+        "Integer column has NA values"
+        if parser.engine == "c"
+        else "Unable to convert column DOY"
+    )
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True)
+
+
+def test_dtype_with_converters(all_parsers):
+    parser = all_parsers
+    data = """a,b
+1.1,2.2
+1.2,2.3"""
+
+    # Dtype spec ignored if converted specified.
+    with tm.assert_produces_warning(ParserWarning):
+        result = parser.read_csv(
+            StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)}
+        )
+    expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype", list(np.typecodes["AllInteger"] + np.typecodes["Float"])
+)
+def test_numeric_dtype(all_parsers, dtype):
+    data = "0\n1"
+    parser = all_parsers
+    expected = DataFrame([0, 1], dtype=dtype)
+
+    result = parser.read_csv(StringIO(data), header=None, dtype=dtype)
+    tm.assert_frame_equal(expected, result)
+
+
+def test_boolean_dtype(all_parsers):
+    parser = all_parsers
+    data = "\n".join(
+        [
+            "a",
+            "True",
+            "TRUE",
+            "true",
+            "1",
+            "1.0",
+            "False",
+            "FALSE",
+            "false",
+            "0",
+            "0.0",
+            "NaN",
+            "nan",
+            "NA",
+            "null",
+            "NULL",
+        ]
+    )
+
+    result = parser.read_csv(StringIO(data), dtype="boolean")
+    expected = DataFrame(
+        {
+            "a": pd.array(
+                [
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    False,
+                    False,
+                    False,
+                    False,
+                    False,
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                ],
+                dtype="boolean",
+            )
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py
new file mode 100644
index 0000000000000..2f569424a82f5
--- /dev/null
+++ b/pandas/tests/io/parser/dtypes/test_categorical.py
@@ -0,0 +1,294 @@
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+import os
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.dtypes import CategoricalDtype
+
+import pandas as pd
+from pandas import Categorical, DataFrame, Timestamp
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        "category",
+        CategoricalDtype(),
+        {"a": "category", "b": "category", "c": CategoricalDtype()},
+    ],
+)
+def test_categorical_dtype(all_parsers, dtype):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,a,3.4
+1,a,3.4
+2,b,4.5"""
+    expected = DataFrame(
+        {
+            "a": Categorical(["1", "1", "2"]),
+            "b": Categorical(["a", "a", "b"]),
+            "c": Categorical(["3.4", "3.4", "4.5"]),
+        }
+    )
+    actual = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(actual, expected)
+
+
+@pytest.mark.parametrize("dtype", [{"b": "category"}, {1: "category"}])
+def test_categorical_dtype_single(all_parsers, dtype):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,a,3.4
+1,a,3.4
+2,b,4.5"""
+    expected = DataFrame(
+        {"a": [1, 1, 2], "b": Categorical(["a", "a", "b"]), "c": [3.4, 3.4, 4.5]}
+    )
+    actual = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_unsorted(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,b,3.4
+1,b,3.4
+2,a,4.5"""
+    expected = DataFrame(
+        {
+            "a": Categorical(["1", "1", "2"]),
+            "b": Categorical(["b", "b", "a"]),
+            "c": Categorical(["3.4", "3.4", "4.5"]),
+        }
+    )
+    actual = parser.read_csv(StringIO(data), dtype="category")
+    tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_missing(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b,c
+1,b,3.4
+1,nan,3.4
+2,a,4.5"""
+    expected = DataFrame(
+        {
+            "a": Categorical(["1", "1", "2"]),
+            "b": Categorical(["b", np.nan, "a"]),
+            "c": Categorical(["3.4", "3.4", "4.5"]),
+        }
+    )
+    actual = parser.read_csv(StringIO(data), dtype="category")
+    tm.assert_frame_equal(actual, expected)
+
+
+@pytest.mark.slow
+def test_categorical_dtype_high_cardinality_numeric(all_parsers):
+    # see gh-18186
+    parser = all_parsers
+    data = np.sort([str(i) for i in range(524289)])
+    expected = DataFrame({"a": Categorical(data, ordered=True)})
+
+    actual = parser.read_csv(StringIO("a\n" + "\n".join(data)), dtype="category")
+    actual["a"] = actual["a"].cat.reorder_categories(
+        np.sort(actual.a.cat.categories), ordered=True
+    )
+    tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_utf16(all_parsers, csv_dir_path):
+    # see gh-10153
+    pth = os.path.join(csv_dir_path, "utf16_ex.txt")
+    parser = all_parsers
+    encoding = "utf-16"
+    sep = "\t"
+
+    expected = parser.read_csv(pth, sep=sep, encoding=encoding)
+    expected = expected.apply(Categorical)
+
+    actual = parser.read_csv(pth, sep=sep, encoding=encoding, dtype="category")
+    tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_chunksize_infer_categories(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    expecteds = [
+        DataFrame({"a": [1, 1], "b": Categorical(["a", "b"])}),
+        DataFrame({"a": [1, 2], "b": Categorical(["b", "c"])}, index=[2, 3]),
+    ]
+    with parser.read_csv(
+        StringIO(data), dtype={"b": "category"}, chunksize=2
+    ) as actuals:
+        for actual, expected in zip(actuals, expecteds):
+            tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
+    # see gh-10153
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    cats = ["a", "b", "c"]
+    expecteds = [
+        DataFrame({"a": [1, 1], "b": Categorical(["a", "b"], categories=cats)}),
+        DataFrame(
+            {"a": [1, 2], "b": Categorical(["b", "c"], categories=cats)},
+            index=[2, 3],
+        ),
+    ]
+    dtype = CategoricalDtype(cats)
+    with parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2) as actuals:
+        for actual, expected in zip(actuals, expecteds):
+            tm.assert_frame_equal(actual, expected)
+
+
+def test_categorical_dtype_latin1(all_parsers, csv_dir_path):
+    # see gh-10153
+    pth = os.path.join(csv_dir_path, "unicode_series.csv")
+    parser = all_parsers
+    encoding = "latin-1"
+
+    expected = parser.read_csv(pth, header=None, encoding=encoding)
+    expected[1] = Categorical(expected[1])
+
+    actual = parser.read_csv(pth, header=None, encoding=encoding, dtype={1: "category"})
+    tm.assert_frame_equal(actual, expected)
+
+
+@pytest.mark.parametrize("ordered", [False, True])
+@pytest.mark.parametrize(
+    "categories",
+    [["a", "b", "c"], ["a", "c", "b"], ["a", "b", "c", "d"], ["c", "b", "a"]],
+)
+def test_categorical_category_dtype(all_parsers, categories, ordered):
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    expected = DataFrame(
+        {
+            "a": [1, 1, 1, 2],
+            "b": Categorical(
+                ["a", "b", "b", "c"], categories=categories, ordered=ordered
+            ),
+        }
+    )
+
+    dtype = {"b": CategoricalDtype(categories=categories, ordered=ordered)}
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_category_dtype_unsorted(all_parsers):
+    parser = all_parsers
+    data = """a,b
+1,a
+1,b
+1,b
+2,c"""
+    dtype = CategoricalDtype(["c", "b", "a"])
+    expected = DataFrame(
+        {
+            "a": [1, 1, 1, 2],
+            "b": Categorical(["a", "b", "b", "c"], categories=["c", "b", "a"]),
+        }
+    )
+
+    result = parser.read_csv(StringIO(data), dtype={"b": dtype})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_numeric(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([1, 2, 3])}
+
+    data = "b\n1\n1\n2\n3"
+    expected = DataFrame({"b": Categorical([1, 1, 2, 3])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_datetime(all_parsers):
+    parser = all_parsers
+    dti = pd.DatetimeIndex(["2017-01-01", "2018-01-01", "2019-01-01"], freq=None)
+    dtype = {"b": CategoricalDtype(dti)}
+
+    data = "b\n2017-01-01\n2018-01-01\n2019-01-01"
+    expected = DataFrame({"b": Categorical(dtype["b"].categories)})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_timestamp(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([Timestamp("2014")])}
+
+    data = "b\n2014-01-01\n2014-01-01T00:00:00"
+    expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_coerces_timedelta(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype(pd.to_timedelta(["1H", "2H", "3H"]))}
+
+    data = "b\n1H\n2H\n3H"
+    expected = DataFrame({"b": Categorical(dtype["b"].categories)})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        "b\nTrue\nFalse\nNA\nFalse",
+        "b\ntrue\nfalse\nNA\nfalse",
+        "b\nTRUE\nFALSE\nNA\nFALSE",
+        "b\nTrue\nFalse\nNA\nFALSE",
+    ],
+)
+def test_categorical_dtype_coerces_boolean(all_parsers, data):
+    # see gh-20498
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype([False, True])}
+    expected = DataFrame({"b": Categorical([True, False, None, False])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_categorical_unexpected_categories(all_parsers):
+    parser = all_parsers
+    dtype = {"b": CategoricalDtype(["a", "b", "d", "e"])}
+
+    data = "b\nd\na\nc\nd"  # Unexpected c
+    expected = DataFrame({"b": Categorical(list("dacd"), dtype=dtype["b"])})
+
+    result = parser.read_csv(StringIO(data), dtype=dtype)
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/dtypes/test_empty.py b/pandas/tests/io/parser/dtypes/test_empty.py
new file mode 100644
index 0000000000000..57d729fb4b7fc
--- /dev/null
+++ b/pandas/tests/io/parser/dtypes/test_empty.py
@@ -0,0 +1,172 @@
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import Categorical, DataFrame, Index, MultiIndex, Series, concat
+import pandas._testing as tm
+
+
+def test_dtype_all_columns_empty(all_parsers):
+    # see gh-12048
+    parser = all_parsers
+    result = parser.read_csv(StringIO("A,B"), dtype=str)
+
+    expected = DataFrame({"A": [], "B": []}, index=[], dtype=str)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_pass_dtype(all_parsers):
+    parser = all_parsers
+
+    data = "one,two"
+    result = parser.read_csv(StringIO(data), dtype={"one": "u1"})
+
+    expected = DataFrame(
+        {"one": np.empty(0, dtype="u1"), "two": np.empty(0, dtype=object)},
+        index=Index([], dtype=object),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_index_pass_dtype(all_parsers):
+    parser = all_parsers
+
+    data = "one,two"
+    result = parser.read_csv(
+        StringIO(data), index_col=["one"], dtype={"one": "u1", 1: "f"}
+    )
+
+    expected = DataFrame(
+        {"two": np.empty(0, dtype="f")}, index=Index([], dtype="u1", name="one")
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_multi_index_pass_dtype(all_parsers):
+    parser = all_parsers
+
+    data = "one,two,three"
+    result = parser.read_csv(
+        StringIO(data), index_col=["one", "two"], dtype={"one": "u1", 1: "f8"}
+    )
+
+    exp_idx = MultiIndex.from_arrays(
+        [np.empty(0, dtype="u1"), np.empty(0, dtype=np.float64)],
+        names=["one", "two"],
+    )
+    expected = DataFrame({"three": np.empty(0, dtype=object)}, index=exp_idx)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
+    parser = all_parsers
+
+    data = "one,one"
+    result = parser.read_csv(StringIO(data), dtype={"one": "u1", "one.1": "f"})
+
+    expected = DataFrame(
+        {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")},
+        index=Index([], dtype=object),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
+    parser = all_parsers
+
+    data = "one,one"
+    result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
+
+    expected = DataFrame(
+        {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")},
+        index=Index([], dtype=object),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers):
+    # see gh-9424
+    parser = all_parsers
+    expected = concat(
+        [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")],
+        axis=1,
+    )
+    expected.index = expected.index.astype(object)
+
+    data = "one,one"
+    result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers):
+    # see gh-9424
+    parser = all_parsers
+    expected = concat(
+        [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")],
+        axis=1,
+    )
+    expected.index = expected.index.astype(object)
+
+    with pytest.raises(ValueError, match="Duplicate names"):
+        data = ""
+        parser.read_csv(StringIO(data), names=["one", "one"], dtype={0: "u1", 1: "f"})
+
+
+@pytest.mark.parametrize(
+    "dtype,expected",
+    [
+        (np.float64, DataFrame(columns=["a", "b"], dtype=np.float64)),
+        (
+            "category",
+            DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]),
+        ),
+        (
+            {"a": "category", "b": "category"},
+            DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]),
+        ),
+        ("datetime64[ns]", DataFrame(columns=["a", "b"], dtype="datetime64[ns]")),
+        (
+            "timedelta64[ns]",
+            DataFrame(
+                {
+                    "a": Series([], dtype="timedelta64[ns]"),
+                    "b": Series([], dtype="timedelta64[ns]"),
+                },
+                index=[],
+            ),
+        ),
+        (
+            {"a": np.int64, "b": np.int32},
+            DataFrame(
+                {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
+                index=[],
+            ),
+        ),
+        (
+            {0: np.int64, 1: np.int32},
+            DataFrame(
+                {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
+                index=[],
+            ),
+        ),
+        (
+            {"a": np.int64, 1: np.int32},
+            DataFrame(
+                {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
+                index=[],
+            ),
+        ),
+    ],
+)
+def test_empty_dtype(all_parsers, dtype, expected):
+    # see gh-14712
+    parser = all_parsers
+    data = "a,b"
+
+    result = parser.read_csv(StringIO(data), header=0, dtype=dtype)
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py
deleted file mode 100644
index d8f819d896e55..0000000000000
--- a/pandas/tests/io/parser/test_dtypes.py
+++ /dev/null
@@ -1,593 +0,0 @@
-"""
-Tests dtype specification during parsing
-for all of the parsers defined in parsers.py
-"""
-from io import StringIO
-import os
-
-import numpy as np
-import pytest
-
-from pandas.errors import ParserWarning
-
-from pandas.core.dtypes.dtypes import CategoricalDtype
-
-import pandas as pd
-from pandas import Categorical, DataFrame, Index, MultiIndex, Series, Timestamp, concat
-import pandas._testing as tm
-
-
-class TestParserDtypesBasic:
-    @pytest.mark.parametrize("dtype", [str, object])
-    @pytest.mark.parametrize("check_orig", [True, False])
-    def test_dtype_all_columns(self, all_parsers, dtype, check_orig):
-        # see gh-3795, gh-6607
-        parser = all_parsers
-
-        df = DataFrame(
-            np.random.rand(5, 2).round(4),
-            columns=list("AB"),
-            index=["1A", "1B", "1C", "1D", "1E"],
-        )
-
-        with tm.ensure_clean("__passing_str_as_dtype__.csv") as path:
-            df.to_csv(path)
-
-            result = parser.read_csv(path, dtype=dtype, index_col=0)
-
-            if check_orig:
-                expected = df.copy()
-                result = result.astype(float)
-            else:
-                expected = df.astype(str)
-
-            tm.assert_frame_equal(result, expected)
-
-    def test_dtype_per_column(self, all_parsers):
-        parser = all_parsers
-        data = """\
-one,two
-1,2.5
-2,3.5
-3,4.5
-4,5.5"""
-        expected = DataFrame(
-            [[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"]
-        )
-        expected["one"] = expected["one"].astype(np.float64)
-        expected["two"] = expected["two"].astype(object)
-
-        result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str})
-        tm.assert_frame_equal(result, expected)
-
-    def test_invalid_dtype_per_column(self, all_parsers):
-        parser = all_parsers
-        data = """\
-one,two
-1,2.5
-2,3.5
-3,4.5
-4,5.5"""
-
-        with pytest.raises(TypeError, match="data type [\"']foo[\"'] not understood"):
-            parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"})
-
-    def test_raise_on_passed_int_dtype_with_nas(self, all_parsers):
-        # see gh-2631
-        parser = all_parsers
-        data = """YEAR, DOY, a
-    2001,106380451,10
-    2001,,11
-    2001,106380451,67"""
-
-        msg = (
-            "Integer column has NA values"
-            if parser.engine == "c"
-            else "Unable to convert column DOY"
-        )
-        with pytest.raises(ValueError, match=msg):
-            parser.read_csv(
-                StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True
-            )
-
-    def test_dtype_with_converters(self, all_parsers):
-        parser = all_parsers
-        data = """a,b
-1.1,2.2
-1.2,2.3"""
-
-        # Dtype spec ignored if converted specified.
-        with tm.assert_produces_warning(ParserWarning):
-            result = parser.read_csv(
-                StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)}
-            )
-        expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]})
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize(
-        "dtype", list(np.typecodes["AllInteger"] + np.typecodes["Float"])
-    )
-    def test_numeric_dtype(self, all_parsers, dtype):
-        data = "0\n1"
-        parser = all_parsers
-        expected = DataFrame([0, 1], dtype=dtype)
-
-        result = parser.read_csv(StringIO(data), header=None, dtype=dtype)
-        tm.assert_frame_equal(expected, result)
-
-    def test_boolean_dtype(self, all_parsers):
-        parser = all_parsers
-        data = "\n".join(
-            [
-                "a",
-                "True",
-                "TRUE",
-                "true",
-                "1",
-                "1.0",
-                "False",
-                "FALSE",
-                "false",
-                "0",
-                "0.0",
-                "NaN",
-                "nan",
-                "NA",
-                "null",
-                "NULL",
-            ]
-        )
-
-        result = parser.read_csv(StringIO(data), dtype="boolean")
-        expected = DataFrame(
-            {
-                "a": pd.array(
-                    [
-                        True,
-                        True,
-                        True,
-                        True,
-                        True,
-                        False,
-                        False,
-                        False,
-                        False,
-                        False,
-                        None,
-                        None,
-                        None,
-                        None,
-                        None,
-                    ],
-                    dtype="boolean",
-                )
-            }
-        )
-
-        tm.assert_frame_equal(result, expected)
-
-
-class TestParserDtypesCategorical1:
-    @pytest.mark.parametrize(
-        "dtype",
-        [
-            "category",
-            CategoricalDtype(),
-            {"a": "category", "b": "category", "c": CategoricalDtype()},
-        ],
-    )
-    def test_categorical_dtype(self, all_parsers, dtype):
-        # see gh-10153
-        parser = all_parsers
-        data = """a,b,c
-1,a,3.4
-1,a,3.4
-2,b,4.5"""
-        expected = DataFrame(
-            {
-                "a": Categorical(["1", "1", "2"]),
-                "b": Categorical(["a", "a", "b"]),
-                "c": Categorical(["3.4", "3.4", "4.5"]),
-            }
-        )
-        actual = parser.read_csv(StringIO(data), dtype=dtype)
-        tm.assert_frame_equal(actual, expected)
-
-    @pytest.mark.parametrize("dtype", [{"b": "category"}, {1: "category"}])
-    def test_categorical_dtype_single(self, all_parsers, dtype):
-        # see gh-10153
-        parser = all_parsers
-        data = """a,b,c
-1,a,3.4
-1,a,3.4
-2,b,4.5"""
-        expected = DataFrame(
-            {"a": [1, 1, 2], "b": Categorical(["a", "a", "b"]), "c": [3.4, 3.4, 4.5]}
-        )
-        actual = parser.read_csv(StringIO(data), dtype=dtype)
-        tm.assert_frame_equal(actual, expected)
-
-    def test_categorical_dtype_unsorted(self, all_parsers):
-        # see gh-10153
-        parser = all_parsers
-        data = """a,b,c
-1,b,3.4
-1,b,3.4
-2,a,4.5"""
-        expected = DataFrame(
-            {
-                "a": Categorical(["1", "1", "2"]),
-                "b": Categorical(["b", "b", "a"]),
-                "c": Categorical(["3.4", "3.4", "4.5"]),
-            }
-        )
-        actual = parser.read_csv(StringIO(data), dtype="category")
-        tm.assert_frame_equal(actual, expected)
-
-    def test_categorical_dtype_missing(self, all_parsers):
-        # see gh-10153
-        parser = all_parsers
-        data = """a,b,c
-1,b,3.4
-1,nan,3.4
-2,a,4.5"""
-        expected = DataFrame(
-            {
-                "a": Categorical(["1", "1", "2"]),
-                "b": Categorical(["b", np.nan, "a"]),
-                "c": Categorical(["3.4", "3.4", "4.5"]),
-            }
-        )
-        actual = parser.read_csv(StringIO(data), dtype="category")
-        tm.assert_frame_equal(actual, expected)
-
-    @pytest.mark.slow
-    def test_categorical_dtype_high_cardinality_numeric(self, all_parsers):
-        # see gh-18186
-        parser = all_parsers
-        data = np.sort([str(i) for i in range(524289)])
-        expected = DataFrame({"a": Categorical(data, ordered=True)})
-
-        actual = parser.read_csv(StringIO("a\n" + "\n".join(data)), dtype="category")
-        actual["a"] = actual["a"].cat.reorder_categories(
-            np.sort(actual.a.cat.categories), ordered=True
-        )
-        tm.assert_frame_equal(actual, expected)
-
-    def test_categorical_dtype_utf16(self, all_parsers, csv_dir_path):
-        # see gh-10153
-        pth = os.path.join(csv_dir_path, "utf16_ex.txt")
-        parser = all_parsers
-        encoding = "utf-16"
-        sep = "\t"
-
-        expected = parser.read_csv(pth, sep=sep, encoding=encoding)
-        expected = expected.apply(Categorical)
-
-        actual = parser.read_csv(pth, sep=sep, encoding=encoding, dtype="category")
-        tm.assert_frame_equal(actual, expected)
-
-    def test_categorical_dtype_chunksize_infer_categories(self, all_parsers):
-        # see gh-10153
-        parser = all_parsers
-        data = """a,b
-1,a
-1,b
-1,b
-2,c"""
-        expecteds = [
-            DataFrame({"a": [1, 1], "b": Categorical(["a", "b"])}),
-            DataFrame({"a": [1, 2], "b": Categorical(["b", "c"])}, index=[2, 3]),
-        ]
-        with parser.read_csv(
-            StringIO(data), dtype={"b": "category"}, chunksize=2
-        ) as actuals:
-            for actual, expected in zip(actuals, expecteds):
-                tm.assert_frame_equal(actual, expected)
-
-    def test_categorical_dtype_chunksize_explicit_categories(self, all_parsers):
-        # see gh-10153
-        parser = all_parsers
-        data = """a,b
-1,a
-1,b
-1,b
-2,c"""
-        cats = ["a", "b", "c"]
-        expecteds = [
-            DataFrame({"a": [1, 1], "b": Categorical(["a", "b"], categories=cats)}),
-            DataFrame(
-                {"a": [1, 2], "b": Categorical(["b", "c"], categories=cats)},
-                index=[2, 3],
-            ),
-        ]
-        dtype = CategoricalDtype(cats)
-        with parser.read_csv(
-            StringIO(data), dtype={"b": dtype}, chunksize=2
-        ) as actuals:
-            for actual, expected in zip(actuals, expecteds):
-                tm.assert_frame_equal(actual, expected)
-
-
-class TestParserDtypesCategorical2:
-    def test_categorical_dtype_latin1(self, all_parsers, csv_dir_path):
-        # see gh-10153
-        pth = os.path.join(csv_dir_path, "unicode_series.csv")
-        parser = all_parsers
-        encoding = "latin-1"
-
-        expected = parser.read_csv(pth, header=None, encoding=encoding)
-        expected[1] = Categorical(expected[1])
-
-        actual = parser.read_csv(
-            pth, header=None, encoding=encoding, dtype={1: "category"}
-        )
-        tm.assert_frame_equal(actual, expected)
-
-    @pytest.mark.parametrize("ordered", [False, True])
-    @pytest.mark.parametrize(
-        "categories",
-        [["a", "b", "c"], ["a", "c", "b"], ["a", "b", "c", "d"], ["c", "b", "a"]],
-    )
-    def test_categorical_category_dtype(self, all_parsers, categories, ordered):
-        parser = all_parsers
-        data = """a,b
-1,a
-1,b
-1,b
-2,c"""
-        expected = DataFrame(
-            {
-                "a": [1, 1, 1, 2],
-                "b": Categorical(
-                    ["a", "b", "b", "c"], categories=categories, ordered=ordered
-                ),
-            }
-        )
-
-        dtype = {"b": CategoricalDtype(categories=categories, ordered=ordered)}
-        result = parser.read_csv(StringIO(data), dtype=dtype)
-        tm.assert_frame_equal(result, expected)
-
-    def test_categorical_category_dtype_unsorted(self, all_parsers):
-        parser = all_parsers
-        data = """a,b
-1,a
-1,b
-1,b
-2,c"""
-        dtype = CategoricalDtype(["c", "b", "a"])
-        expected = DataFrame(
-            {
-                "a": [1, 1, 1, 2],
-                "b": Categorical(["a", "b", "b", "c"], categories=["c", "b", "a"]),
-            }
-        )
-
-        result = parser.read_csv(StringIO(data), dtype={"b": dtype})
-        tm.assert_frame_equal(result, expected)
-
-    def test_categorical_coerces_numeric(self, all_parsers):
-        parser = all_parsers
-        dtype = {"b": CategoricalDtype([1, 2, 3])}
-
-        data = "b\n1\n1\n2\n3"
-        expected = DataFrame({"b": Categorical([1, 1, 2, 3])})
-
-        result = parser.read_csv(StringIO(data), dtype=dtype)
-        tm.assert_frame_equal(result, expected)
-
-    def test_categorical_coerces_datetime(self, all_parsers):
-        parser = all_parsers
-        dti = pd.DatetimeIndex(["2017-01-01", "2018-01-01", "2019-01-01"], freq=None)
-        dtype = {"b": CategoricalDtype(dti)}
-
-        data = "b\n2017-01-01\n2018-01-01\n2019-01-01"
-        expected = DataFrame({"b": Categorical(dtype["b"].categories)})
-
-        result = parser.read_csv(StringIO(data), dtype=dtype)
-        tm.assert_frame_equal(result, expected)
-
-    def test_categorical_coerces_timestamp(self, all_parsers):
-        parser = all_parsers
-        dtype = {"b": CategoricalDtype([Timestamp("2014")])}
-
-        data = "b\n2014-01-01\n2014-01-01T00:00:00"
-        expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)})
-
-        result = parser.read_csv(StringIO(data), dtype=dtype)
-        tm.assert_frame_equal(result, expected)
-
-    def test_categorical_coerces_timedelta(self, all_parsers):
-        parser = all_parsers
-        dtype = {"b": CategoricalDtype(pd.to_timedelta(["1H", "2H", "3H"]))}
-
-        data = "b\n1H\n2H\n3H"
-        expected = DataFrame({"b": Categorical(dtype["b"].categories)})
-
-        result = parser.read_csv(StringIO(data), dtype=dtype)
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize(
-        "data",
-        [
-            "b\nTrue\nFalse\nNA\nFalse",
-            "b\ntrue\nfalse\nNA\nfalse",
-            "b\nTRUE\nFALSE\nNA\nFALSE",
-            "b\nTrue\nFalse\nNA\nFALSE",
-        ],
-    )
-    def test_categorical_dtype_coerces_boolean(self, all_parsers, data):
-        # see gh-20498
-        parser = all_parsers
-        dtype = {"b": CategoricalDtype([False, True])}
-        expected = DataFrame({"b": Categorical([True, False, None, False])})
-
-        result = parser.read_csv(StringIO(data), dtype=dtype)
-        tm.assert_frame_equal(result, expected)
-
-    def test_categorical_unexpected_categories(self, all_parsers):
-        parser = all_parsers
-        dtype = {"b": CategoricalDtype(["a", "b", "d", "e"])}
-
-        data = "b\nd\na\nc\nd"  # Unexpected c
-        expected = DataFrame({"b": Categorical(list("dacd"), dtype=dtype["b"])})
-
-        result = parser.read_csv(StringIO(data), dtype=dtype)
-        tm.assert_frame_equal(result, expected)
-
-
-class TestParserDtypesEmpty:
-    def test_dtype_all_columns_empty(self, all_parsers):
-        # see gh-12048
-        parser = all_parsers
-        result = parser.read_csv(StringIO("A,B"), dtype=str)
-
-        expected = DataFrame({"A": [], "B": []}, index=[], dtype=str)
-        tm.assert_frame_equal(result, expected)
-
-    def test_empty_pass_dtype(self, all_parsers):
-        parser = all_parsers
-
-        data = "one,two"
-        result = parser.read_csv(StringIO(data), dtype={"one": "u1"})
-
-        expected = DataFrame(
-            {"one": np.empty(0, dtype="u1"), "two": np.empty(0, dtype=object)},
-            index=Index([], dtype=object),
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_empty_with_index_pass_dtype(self, all_parsers):
-        parser = all_parsers
-
-        data = "one,two"
-        result = parser.read_csv(
-            StringIO(data), index_col=["one"], dtype={"one": "u1", 1: "f"}
-        )
-
-        expected = DataFrame(
-            {"two": np.empty(0, dtype="f")}, index=Index([], dtype="u1", name="one")
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_empty_with_multi_index_pass_dtype(self, all_parsers):
-        parser = all_parsers
-
-        data = "one,two,three"
-        result = parser.read_csv(
-            StringIO(data), index_col=["one", "two"], dtype={"one": "u1", 1: "f8"}
-        )
-
-        exp_idx = MultiIndex.from_arrays(
-            [np.empty(0, dtype="u1"), np.empty(0, dtype=np.float64)],
-            names=["one", "two"],
-        )
-        expected = DataFrame({"three": np.empty(0, dtype=object)}, index=exp_idx)
-        tm.assert_frame_equal(result, expected)
-
-    def test_empty_with_mangled_column_pass_dtype_by_names(self, all_parsers):
-        parser = all_parsers
-
-        data = "one,one"
-        result = parser.read_csv(StringIO(data), dtype={"one": "u1", "one.1": "f"})
-
-        expected = DataFrame(
-            {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")},
-            index=Index([], dtype=object),
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_empty_with_mangled_column_pass_dtype_by_indexes(self, all_parsers):
-        parser = all_parsers
-
-        data = "one,one"
-        result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
-
-        expected = DataFrame(
-            {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")},
-            index=Index([], dtype=object),
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_empty_with_dup_column_pass_dtype_by_indexes(self, all_parsers):
-        # see gh-9424
-        parser = all_parsers
-        expected = concat(
-            [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")],
-            axis=1,
-        )
-        expected.index = expected.index.astype(object)
-
-        data = "one,one"
-        result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"})
-        tm.assert_frame_equal(result, expected)
-
-    def test_empty_with_dup_column_pass_dtype_by_indexes_raises(self, all_parsers):
-        # see gh-9424
-        parser = all_parsers
-        expected = concat(
-            [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")],
-            axis=1,
-        )
-        expected.index = expected.index.astype(object)
-
-        with pytest.raises(ValueError, match="Duplicate names"):
-            data = ""
-            parser.read_csv(
-                StringIO(data), names=["one", "one"], dtype={0: "u1", 1: "f"}
-            )
-
-    @pytest.mark.parametrize(
-        "dtype,expected",
-        [
-            (np.float64, DataFrame(columns=["a", "b"], dtype=np.float64)),
-            (
-                "category",
-                DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]),
-            ),
-            (
-                {"a": "category", "b": "category"},
-                DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]),
-            ),
-            ("datetime64[ns]", DataFrame(columns=["a", "b"], dtype="datetime64[ns]")),
-            (
-                "timedelta64[ns]",
-                DataFrame(
-                    {
-                        "a": Series([], dtype="timedelta64[ns]"),
-                        "b": Series([], dtype="timedelta64[ns]"),
-                    },
-                    index=[],
-                ),
-            ),
-            (
-                {"a": np.int64, "b": np.int32},
-                DataFrame(
-                    {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
-                    index=[],
-                ),
-            ),
-            (
-                {0: np.int64, 1: np.int32},
-                DataFrame(
-                    {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
-                    index=[],
-                ),
-            ),
-            (
-                {"a": np.int64, 1: np.int32},
-                DataFrame(
-                    {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)},
-                    index=[],
-                ),
-            ),
-        ],
-    )
-    def test_empty_dtype(self, all_parsers, dtype, expected):
-        # see gh-14712
-        parser = all_parsers
-        data = "a,b"
-
-        result = parser.read_csv(StringIO(data), header=0, dtype=dtype)
-        tm.assert_frame_equal(result, expected)

From e17ddfddcf22eb0ad790d5cc44e640599d1ef826 Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <andrew.r.wieteska@gmail.com>
Date: Wed, 30 Dec 2020 23:56:58 -0500
Subject: [PATCH 4/6] split test_usecols.py into multiple files

---
 pandas/tests/io/parser/test_usecols.py        | 574 ------------------
 pandas/tests/io/parser/usecols/test_basic.py  | 372 ++++++++++++
 .../io/parser/usecols/test_parse_dates.py     | 149 +++++
 .../tests/io/parser/usecols/test_strings.py   |  97 +++
 4 files changed, 618 insertions(+), 574 deletions(-)
 delete mode 100644 pandas/tests/io/parser/test_usecols.py
 create mode 100644 pandas/tests/io/parser/usecols/test_basic.py
 create mode 100644 pandas/tests/io/parser/usecols/test_parse_dates.py
 create mode 100644 pandas/tests/io/parser/usecols/test_strings.py

diff --git a/pandas/tests/io/parser/test_usecols.py b/pandas/tests/io/parser/test_usecols.py
deleted file mode 100644
index e75046d3017f2..0000000000000
--- a/pandas/tests/io/parser/test_usecols.py
+++ /dev/null
@@ -1,574 +0,0 @@
-"""
-Tests the usecols functionality during parsing
-for all of the parsers defined in parsers.py
-"""
-from io import StringIO
-
-import numpy as np
-import pytest
-
-from pandas._libs.tslib import Timestamp
-
-from pandas import DataFrame, Index
-import pandas._testing as tm
-
-_msg_validate_usecols_arg = (
-    "'usecols' must either be list-like "
-    "of all strings, all unicode, all "
-    "integers or a callable."
-)
-_msg_validate_usecols_names = (
-    "Usecols do not match columns, columns expected but not found: {0}"
-)
-
-
-class TestParserUsecolsBasic:
-    def test_raise_on_mixed_dtype_usecols(self, all_parsers):
-        # See gh-12678
-        data = """a,b,c
-            1000,2000,3000
-            4000,5000,6000
-            """
-        usecols = [0, "b", 2]
-        parser = all_parsers
-
-        with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
-            parser.read_csv(StringIO(data), usecols=usecols)
-
-    @pytest.mark.parametrize("usecols", [(1, 2), ("b", "c")])
-    def test_usecols(self, all_parsers, usecols):
-        data = """\
-    a,b,c
-    1,2,3
-    4,5,6
-    7,8,9
-    10,11,12"""
-        parser = all_parsers
-        result = parser.read_csv(StringIO(data), usecols=usecols)
-
-        expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"])
-        tm.assert_frame_equal(result, expected)
-
-    def test_usecols_with_names(self, all_parsers):
-        data = """\
-    a,b,c
-    1,2,3
-    4,5,6
-    7,8,9
-    10,11,12"""
-        parser = all_parsers
-        names = ["foo", "bar"]
-        result = parser.read_csv(StringIO(data), names=names, usecols=[1, 2], header=0)
-
-        expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=names)
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize(
-        "names,usecols", [(["b", "c"], [1, 2]), (["a", "b", "c"], ["b", "c"])]
-    )
-    def test_usecols_relative_to_names(self, all_parsers, names, usecols):
-        data = """\
-    1,2,3
-    4,5,6
-    7,8,9
-    10,11,12"""
-        parser = all_parsers
-        result = parser.read_csv(
-            StringIO(data), names=names, header=None, usecols=usecols
-        )
-
-        expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"])
-        tm.assert_frame_equal(result, expected)
-
-    def test_usecols_relative_to_names2(self, all_parsers):
-        # see gh-5766
-        data = """\
-    1,2,3
-    4,5,6
-    7,8,9
-    10,11,12"""
-        parser = all_parsers
-        result = parser.read_csv(
-            StringIO(data), names=["a", "b"], header=None, usecols=[0, 1]
-        )
-
-        expected = DataFrame([[1, 2], [4, 5], [7, 8], [10, 11]], columns=["a", "b"])
-        tm.assert_frame_equal(result, expected)
-
-    def test_usecols_name_length_conflict(self, all_parsers):
-        data = """\
-    1,2,3
-    4,5,6
-    7,8,9
-    10,11,12"""
-        parser = all_parsers
-        msg = "Number of passed names did not match number of header fields in the file"
-
-        with pytest.raises(ValueError, match=msg):
-            parser.read_csv(StringIO(data), names=["a", "b"], header=None, usecols=[1])
-
-    def test_usecols_single_string(self, all_parsers):
-        # see gh-20558
-        parser = all_parsers
-        data = """foo, bar, baz
-    1000, 2000, 3000
-    4000, 5000, 6000"""
-
-        with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
-            parser.read_csv(StringIO(data), usecols="foo")
-
-    @pytest.mark.parametrize(
-        "data", ["a,b,c,d\n1,2,3,4\n5,6,7,8", "a,b,c,d\n1,2,3,4,\n5,6,7,8,"]
-    )
-    def test_usecols_index_col_false(self, all_parsers, data):
-        # see gh-9082
-        parser = all_parsers
-        usecols = ["a", "c", "d"]
-        expected = DataFrame({"a": [1, 5], "c": [3, 7], "d": [4, 8]})
-
-        result = parser.read_csv(StringIO(data), usecols=usecols, index_col=False)
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize("index_col", ["b", 0])
-    @pytest.mark.parametrize("usecols", [["b", "c"], [1, 2]])
-    def test_usecols_index_col_conflict(self, all_parsers, usecols, index_col):
-        # see gh-4201: test that index_col as integer reflects usecols
-        parser = all_parsers
-        data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
-        expected = DataFrame({"c": [1, 2]}, index=Index(["a", "b"], name="b"))
-
-        result = parser.read_csv(StringIO(data), usecols=usecols, index_col=index_col)
-        tm.assert_frame_equal(result, expected)
-
-    def test_usecols_index_col_conflict2(self, all_parsers):
-        # see gh-4201: test that index_col as integer reflects usecols
-        parser = all_parsers
-        data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
-
-        expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")})
-        expected = expected.set_index(["b", "c"])
-
-        result = parser.read_csv(
-            StringIO(data), usecols=["b", "c", "d"], index_col=["b", "c"]
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_usecols_implicit_index_col(self, all_parsers):
-        # see gh-2654
-        parser = all_parsers
-        data = "a,b,c\n4,apple,bat,5.7\n8,orange,cow,10"
-
-        result = parser.read_csv(StringIO(data), usecols=["a", "b"])
-        expected = DataFrame(
-            {"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8]
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_usecols_regex_sep(self, all_parsers):
-        # see gh-2733
-        parser = all_parsers
-        data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
-        result = parser.read_csv(StringIO(data), sep=r"\s+", usecols=("a", "b"))
-
-        expected = DataFrame(
-            {"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8]
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_usecols_with_whitespace(self, all_parsers):
-        parser = all_parsers
-        data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
-
-        result = parser.read_csv(
-            StringIO(data), delim_whitespace=True, usecols=("a", "b")
-        )
-        expected = DataFrame(
-            {"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8]
-        )
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize(
-        "usecols,expected",
-        [
-            # Column selection by index.
-            ([0, 1], DataFrame(data=[[1000, 2000], [4000, 5000]], columns=["2", "0"])),
-            # Column selection by name.
-            (
-                ["0", "1"],
-                DataFrame(data=[[2000, 3000], [5000, 6000]], columns=["0", "1"]),
-            ),
-        ],
-    )
-    def test_usecols_with_integer_like_header(self, all_parsers, usecols, expected):
-        parser = all_parsers
-        data = """2,0,1
-    1000,2000,3000
-    4000,5000,6000"""
-
-        result = parser.read_csv(StringIO(data), usecols=usecols)
-        tm.assert_frame_equal(result, expected)
-
-    def test_empty_usecols(self, all_parsers):
-        data = "a,b,c\n1,2,3\n4,5,6"
-        expected = DataFrame()
-        parser = all_parsers
-
-        result = parser.read_csv(StringIO(data), usecols=set())
-        tm.assert_frame_equal(result, expected)
-
-    def test_np_array_usecols(self, all_parsers):
-        # see gh-12546
-        parser = all_parsers
-        data = "a,b,c\n1,2,3"
-        usecols = np.array(["a", "b"])
-
-        expected = DataFrame([[1, 2]], columns=usecols)
-        result = parser.read_csv(StringIO(data), usecols=usecols)
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize(
-        "usecols,expected",
-        [
-            (
-                lambda x: x.upper() in ["AAA", "BBB", "DDD"],
-                DataFrame(
-                    {
-                        "AaA": {
-                            0: 0.056674972999999997,
-                            1: 2.6132309819999997,
-                            2: 3.5689350380000002,
-                        },
-                        "bBb": {0: 8, 1: 2, 2: 7},
-                        "ddd": {0: "a", 1: "b", 2: "a"},
-                    }
-                ),
-            ),
-            (lambda x: False, DataFrame()),
-        ],
-    )
-    def test_callable_usecols(self, all_parsers, usecols, expected):
-        # see gh-14154
-        data = """AaA,bBb,CCC,ddd
-    0.056674973,8,True,a
-    2.613230982,2,False,b
-    3.568935038,7,False,a"""
-        parser = all_parsers
-
-        result = parser.read_csv(StringIO(data), usecols=usecols)
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize("usecols", [["a", "c"], lambda x: x in ["a", "c"]])
-    def test_incomplete_first_row(self, all_parsers, usecols):
-        # see gh-6710
-        data = "1,2\n1,2,3"
-        parser = all_parsers
-        names = ["a", "b", "c"]
-        expected = DataFrame({"a": [1, 1], "c": [np.nan, 3]})
-
-        result = parser.read_csv(StringIO(data), names=names, usecols=usecols)
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize(
-        "data,usecols,kwargs,expected",
-        [
-            # see gh-8985
-            (
-                "19,29,39\n" * 2 + "10,20,30,40",
-                [0, 1, 2],
-                {"header": None},
-                DataFrame([[19, 29, 39], [19, 29, 39], [10, 20, 30]]),
-            ),
-            # see gh-9549
-            (
-                ("A,B,C\n1,2,3\n3,4,5\n1,2,4,5,1,6\n1,2,3,,,1,\n1,2,3\n5,6,7"),
-                ["A", "B", "C"],
-                {},
-                DataFrame(
-                    {
-                        "A": [1, 3, 1, 1, 1, 5],
-                        "B": [2, 4, 2, 2, 2, 6],
-                        "C": [3, 5, 4, 3, 3, 7],
-                    }
-                ),
-            ),
-        ],
-    )
-    def test_uneven_length_cols(self, all_parsers, data, usecols, kwargs, expected):
-        # see gh-8985
-        parser = all_parsers
-        result = parser.read_csv(StringIO(data), usecols=usecols, **kwargs)
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize(
-        "usecols,kwargs,expected,msg",
-        [
-            (
-                ["a", "b", "c", "d"],
-                {},
-                DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}),
-                None,
-            ),
-            (
-                ["a", "b", "c", "f"],
-                {},
-                None,
-                _msg_validate_usecols_names.format(r"\['f'\]"),
-            ),
-            (["a", "b", "f"], {}, None, _msg_validate_usecols_names.format(r"\['f'\]")),
-            (
-                ["a", "b", "f", "g"],
-                {},
-                None,
-                _msg_validate_usecols_names.format(r"\[('f', 'g'|'g', 'f')\]"),
-            ),
-            # see gh-14671
-            (
-                None,
-                {"header": 0, "names": ["A", "B", "C", "D"]},
-                DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 7], "D": [4, 8]}),
-                None,
-            ),
-            (
-                ["A", "B", "C", "f"],
-                {"header": 0, "names": ["A", "B", "C", "D"]},
-                None,
-                _msg_validate_usecols_names.format(r"\['f'\]"),
-            ),
-            (
-                ["A", "B", "f"],
-                {"names": ["A", "B", "C", "D"]},
-                None,
-                _msg_validate_usecols_names.format(r"\['f'\]"),
-            ),
-        ],
-    )
-    def test_raises_on_usecols_names_mismatch(
-        self, all_parsers, usecols, kwargs, expected, msg
-    ):
-        data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
-        kwargs.update(usecols=usecols)
-        parser = all_parsers
-
-        if expected is None:
-            with pytest.raises(ValueError, match=msg):
-                parser.read_csv(StringIO(data), **kwargs)
-        else:
-            result = parser.read_csv(StringIO(data), **kwargs)
-            tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize("usecols", [["A", "C"], [0, 2]])
-    def test_usecols_subset_names_mismatch_orig_columns(self, all_parsers, usecols):
-        data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
-        names = ["A", "B", "C", "D"]
-        parser = all_parsers
-
-        result = parser.read_csv(StringIO(data), header=0, names=names, usecols=usecols)
-        expected = DataFrame({"A": [1, 5], "C": [3, 7]})
-        tm.assert_frame_equal(result, expected)
-
-
-class TestUsecolsParseDates:
-    @pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
-    def test_usecols_with_parse_dates(self, all_parsers, usecols):
-        # see gh-9755
-        data = """a,b,c,d,e
-    0,1,20140101,0900,4
-    0,1,20140102,1000,4"""
-        parser = all_parsers
-        parse_dates = [[1, 2]]
-
-        cols = {
-            "a": [0, 0],
-            "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
-        }
-        expected = DataFrame(cols, columns=["c_d", "a"])
-        result = parser.read_csv(
-            StringIO(data), usecols=usecols, parse_dates=parse_dates
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_usecols_with_parse_dates2(self, all_parsers):
-        # see gh-13604
-        parser = all_parsers
-        data = """2008-02-07 09:40,1032.43
-    2008-02-07 09:50,1042.54
-    2008-02-07 10:00,1051.65"""
-
-        names = ["date", "values"]
-        usecols = names[:]
-        parse_dates = [0]
-
-        index = Index(
-            [
-                Timestamp("2008-02-07 09:40"),
-                Timestamp("2008-02-07 09:50"),
-                Timestamp("2008-02-07 10:00"),
-            ],
-            name="date",
-        )
-        cols = {"values": [1032.43, 1042.54, 1051.65]}
-        expected = DataFrame(cols, index=index)
-
-        result = parser.read_csv(
-            StringIO(data),
-            parse_dates=parse_dates,
-            index_col=0,
-            usecols=usecols,
-            header=None,
-            names=names,
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_usecols_with_parse_dates3(self, all_parsers):
-        # see gh-14792
-        parser = all_parsers
-        data = """a,b,c,d,e,f,g,h,i,j
-    2016/09/21,1,1,2,3,4,5,6,7,8"""
-
-        usecols = list("abcdefghij")
-        parse_dates = [0]
-
-        cols = {
-            "a": Timestamp("2016-09-21"),
-            "b": [1],
-            "c": [1],
-            "d": [2],
-            "e": [3],
-            "f": [4],
-            "g": [5],
-            "h": [6],
-            "i": [7],
-            "j": [8],
-        }
-        expected = DataFrame(cols, columns=usecols)
-
-        result = parser.read_csv(
-            StringIO(data), usecols=usecols, parse_dates=parse_dates
-        )
-        tm.assert_frame_equal(result, expected)
-
-    def test_usecols_with_parse_dates4(self, all_parsers):
-        data = "a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8"
-        usecols = list("abcdefghij")
-        parse_dates = [[0, 1]]
-        parser = all_parsers
-
-        cols = {
-            "a_b": "2016/09/21 1",
-            "c": [1],
-            "d": [2],
-            "e": [3],
-            "f": [4],
-            "g": [5],
-            "h": [6],
-            "i": [7],
-            "j": [8],
-        }
-        expected = DataFrame(cols, columns=["a_b"] + list("cdefghij"))
-
-        result = parser.read_csv(
-            StringIO(data), usecols=usecols, parse_dates=parse_dates
-        )
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
-    @pytest.mark.parametrize(
-        "names",
-        [
-            list("abcde"),  # Names span all columns in original data.
-            list("acd"),  # Names span only the selected columns.
-        ],
-    )
-    def test_usecols_with_parse_dates_and_names(self, all_parsers, usecols, names):
-        # see gh-9755
-        s = """0,1,20140101,0900,4
-    0,1,20140102,1000,4"""
-        parse_dates = [[1, 2]]
-        parser = all_parsers
-
-        cols = {
-            "a": [0, 0],
-            "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
-        }
-        expected = DataFrame(cols, columns=["c_d", "a"])
-
-        result = parser.read_csv(
-            StringIO(s), names=names, parse_dates=parse_dates, usecols=usecols
-        )
-        tm.assert_frame_equal(result, expected)
-
-
-class TestUsecolsStrings:
-    def test_usecols_with_unicode_strings(self, all_parsers):
-        # see gh-13219
-        data = """AAA,BBB,CCC,DDD
-    0.056674973,8,True,a
-    2.613230982,2,False,b
-    3.568935038,7,False,a"""
-        parser = all_parsers
-
-        exp_data = {
-            "AAA": {
-                0: 0.056674972999999997,
-                1: 2.6132309819999997,
-                2: 3.5689350380000002,
-            },
-            "BBB": {0: 8, 1: 2, 2: 7},
-        }
-        expected = DataFrame(exp_data)
-
-        result = parser.read_csv(StringIO(data), usecols=["AAA", "BBB"])
-        tm.assert_frame_equal(result, expected)
-
-    def test_usecols_with_single_byte_unicode_strings(self, all_parsers):
-        # see gh-13219
-        data = """A,B,C,D
-    0.056674973,8,True,a
-    2.613230982,2,False,b
-    3.568935038,7,False,a"""
-        parser = all_parsers
-
-        exp_data = {
-            "A": {
-                0: 0.056674972999999997,
-                1: 2.6132309819999997,
-                2: 3.5689350380000002,
-            },
-            "B": {0: 8, 1: 2, 2: 7},
-        }
-        expected = DataFrame(exp_data)
-
-        result = parser.read_csv(StringIO(data), usecols=["A", "B"])
-        tm.assert_frame_equal(result, expected)
-
-    @pytest.mark.parametrize("usecols", [["AAA", b"BBB"], [b"AAA", "BBB"]])
-    def test_usecols_with_mixed_encoding_strings(self, all_parsers, usecols):
-        data = """AAA,BBB,CCC,DDD
-    0.056674973,8,True,a
-    2.613230982,2,False,b
-    3.568935038,7,False,a"""
-        parser = all_parsers
-
-        with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
-            parser.read_csv(StringIO(data), usecols=usecols)
-
-    @pytest.mark.parametrize("usecols", [["あああ", "いい"], ["あああ", "いい"]])
-    def test_usecols_with_multi_byte_characters(self, all_parsers, usecols):
-        data = """あああ,いい,ううう,ええええ
-    0.056674973,8,True,a
-    2.613230982,2,False,b
-    3.568935038,7,False,a"""
-        parser = all_parsers
-
-        exp_data = {
-            "あああ": {
-                0: 0.056674972999999997,
-                1: 2.6132309819999997,
-                2: 3.5689350380000002,
-            },
-            "いい": {0: 8, 1: 2, 2: 7},
-        }
-        expected = DataFrame(exp_data)
-
-        result = parser.read_csv(StringIO(data), usecols=usecols)
-        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/usecols/test_basic.py b/pandas/tests/io/parser/usecols/test_basic.py
new file mode 100644
index 0000000000000..7d81a88e09012
--- /dev/null
+++ b/pandas/tests/io/parser/usecols/test_basic.py
@@ -0,0 +1,372 @@
+"""
+Tests the usecols functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas import DataFrame, Index
+import pandas._testing as tm
+
+_msg_validate_usecols_arg = (
+    "'usecols' must either be list-like "
+    "of all strings, all unicode, all "
+    "integers or a callable."
+)
+_msg_validate_usecols_names = (
+    "Usecols do not match columns, columns expected but not found: {0}"
+)
+
+
+def test_raise_on_mixed_dtype_usecols(all_parsers):
+    # See gh-12678
+    data = """a,b,c
+        1000,2000,3000
+        4000,5000,6000
+        """
+    usecols = [0, "b", 2]
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+        parser.read_csv(StringIO(data), usecols=usecols)
+
+
+@pytest.mark.parametrize("usecols", [(1, 2), ("b", "c")])
+def test_usecols(all_parsers, usecols):
+    data = """\
+a,b,c
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+
+    expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_names(all_parsers):
+    data = """\
+a,b,c
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    names = ["foo", "bar"]
+    result = parser.read_csv(StringIO(data), names=names, usecols=[1, 2], header=0)
+
+    expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=names)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "names,usecols", [(["b", "c"], [1, 2]), (["a", "b", "c"], ["b", "c"])]
+)
+def test_usecols_relative_to_names(all_parsers, names, usecols):
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), names=names, header=None, usecols=usecols)
+
+    expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_relative_to_names2(all_parsers):
+    # see gh-5766
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    result = parser.read_csv(
+        StringIO(data), names=["a", "b"], header=None, usecols=[0, 1]
+    )
+
+    expected = DataFrame([[1, 2], [4, 5], [7, 8], [10, 11]], columns=["a", "b"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_name_length_conflict(all_parsers):
+    data = """\
+1,2,3
+4,5,6
+7,8,9
+10,11,12"""
+    parser = all_parsers
+    msg = "Number of passed names did not match number of header fields in the file"
+
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), names=["a", "b"], header=None, usecols=[1])
+
+
+def test_usecols_single_string(all_parsers):
+    # see gh-20558
+    parser = all_parsers
+    data = """foo, bar, baz
+1000, 2000, 3000
+4000, 5000, 6000"""
+
+    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+        parser.read_csv(StringIO(data), usecols="foo")
+
+
+@pytest.mark.parametrize(
+    "data", ["a,b,c,d\n1,2,3,4\n5,6,7,8", "a,b,c,d\n1,2,3,4,\n5,6,7,8,"]
+)
+def test_usecols_index_col_false(all_parsers, data):
+    # see gh-9082
+    parser = all_parsers
+    usecols = ["a", "c", "d"]
+    expected = DataFrame({"a": [1, 5], "c": [3, 7], "d": [4, 8]})
+
+    result = parser.read_csv(StringIO(data), usecols=usecols, index_col=False)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("index_col", ["b", 0])
+@pytest.mark.parametrize("usecols", [["b", "c"], [1, 2]])
+def test_usecols_index_col_conflict(all_parsers, usecols, index_col):
+    # see gh-4201: test that index_col as integer reflects usecols
+    parser = all_parsers
+    data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
+    expected = DataFrame({"c": [1, 2]}, index=Index(["a", "b"], name="b"))
+
+    result = parser.read_csv(StringIO(data), usecols=usecols, index_col=index_col)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_index_col_conflict2(all_parsers):
+    # see gh-4201: test that index_col as integer reflects usecols
+    parser = all_parsers
+    data = "a,b,c,d\nA,a,1,one\nB,b,2,two"
+
+    expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")})
+    expected = expected.set_index(["b", "c"])
+
+    result = parser.read_csv(
+        StringIO(data), usecols=["b", "c", "d"], index_col=["b", "c"]
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_implicit_index_col(all_parsers):
+    # see gh-2654
+    parser = all_parsers
+    data = "a,b,c\n4,apple,bat,5.7\n8,orange,cow,10"
+
+    result = parser.read_csv(StringIO(data), usecols=["a", "b"])
+    expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_regex_sep(all_parsers):
+    # see gh-2733
+    parser = all_parsers
+    data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
+    result = parser.read_csv(StringIO(data), sep=r"\s+", usecols=("a", "b"))
+
+    expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_whitespace(all_parsers):
+    parser = all_parsers
+    data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"
+
+    result = parser.read_csv(StringIO(data), delim_whitespace=True, usecols=("a", "b"))
+    expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "usecols,expected",
+    [
+        # Column selection by index.
+        ([0, 1], DataFrame(data=[[1000, 2000], [4000, 5000]], columns=["2", "0"])),
+        # Column selection by name.
+        (
+            ["0", "1"],
+            DataFrame(data=[[2000, 3000], [5000, 6000]], columns=["0", "1"]),
+        ),
+    ],
+)
+def test_usecols_with_integer_like_header(all_parsers, usecols, expected):
+    parser = all_parsers
+    data = """2,0,1
+1000,2000,3000
+4000,5000,6000"""
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_empty_usecols(all_parsers):
+    data = "a,b,c\n1,2,3\n4,5,6"
+    expected = DataFrame()
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), usecols=set())
+    tm.assert_frame_equal(result, expected)
+
+
+def test_np_array_usecols(all_parsers):
+    # see gh-12546
+    parser = all_parsers
+    data = "a,b,c\n1,2,3"
+    usecols = np.array(["a", "b"])
+
+    expected = DataFrame([[1, 2]], columns=usecols)
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "usecols,expected",
+    [
+        (
+            lambda x: x.upper() in ["AAA", "BBB", "DDD"],
+            DataFrame(
+                {
+                    "AaA": {
+                        0: 0.056674972999999997,
+                        1: 2.6132309819999997,
+                        2: 3.5689350380000002,
+                    },
+                    "bBb": {0: 8, 1: 2, 2: 7},
+                    "ddd": {0: "a", 1: "b", 2: "a"},
+                }
+            ),
+        ),
+        (lambda x: False, DataFrame()),
+    ],
+)
+def test_callable_usecols(all_parsers, usecols, expected):
+    # see gh-14154
+    data = """AaA,bBb,CCC,ddd
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [["a", "c"], lambda x: x in ["a", "c"]])
+def test_incomplete_first_row(all_parsers, usecols):
+    # see gh-6710
+    data = "1,2\n1,2,3"
+    parser = all_parsers
+    names = ["a", "b", "c"]
+    expected = DataFrame({"a": [1, 1], "c": [np.nan, 3]})
+
+    result = parser.read_csv(StringIO(data), names=names, usecols=usecols)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "data,usecols,kwargs,expected",
+    [
+        # see gh-8985
+        (
+            "19,29,39\n" * 2 + "10,20,30,40",
+            [0, 1, 2],
+            {"header": None},
+            DataFrame([[19, 29, 39], [19, 29, 39], [10, 20, 30]]),
+        ),
+        # see gh-9549
+        (
+            ("A,B,C\n1,2,3\n3,4,5\n1,2,4,5,1,6\n1,2,3,,,1,\n1,2,3\n5,6,7"),
+            ["A", "B", "C"],
+            {},
+            DataFrame(
+                {
+                    "A": [1, 3, 1, 1, 1, 5],
+                    "B": [2, 4, 2, 2, 2, 6],
+                    "C": [3, 5, 4, 3, 3, 7],
+                }
+            ),
+        ),
+    ],
+)
+def test_uneven_length_cols(all_parsers, data, usecols, kwargs, expected):
+    # see gh-8985
+    parser = all_parsers
+    result = parser.read_csv(StringIO(data), usecols=usecols, **kwargs)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "usecols,kwargs,expected,msg",
+    [
+        (
+            ["a", "b", "c", "d"],
+            {},
+            DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}),
+            None,
+        ),
+        (
+            ["a", "b", "c", "f"],
+            {},
+            None,
+            _msg_validate_usecols_names.format(r"\['f'\]"),
+        ),
+        (["a", "b", "f"], {}, None, _msg_validate_usecols_names.format(r"\['f'\]")),
+        (
+            ["a", "b", "f", "g"],
+            {},
+            None,
+            _msg_validate_usecols_names.format(r"\[('f', 'g'|'g', 'f')\]"),
+        ),
+        # see gh-14671
+        (
+            None,
+            {"header": 0, "names": ["A", "B", "C", "D"]},
+            DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 7], "D": [4, 8]}),
+            None,
+        ),
+        (
+            ["A", "B", "C", "f"],
+            {"header": 0, "names": ["A", "B", "C", "D"]},
+            None,
+            _msg_validate_usecols_names.format(r"\['f'\]"),
+        ),
+        (
+            ["A", "B", "f"],
+            {"names": ["A", "B", "C", "D"]},
+            None,
+            _msg_validate_usecols_names.format(r"\['f'\]"),
+        ),
+    ],
+)
+def test_raises_on_usecols_names_mismatch(all_parsers, usecols, kwargs, expected, msg):
+    data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
+    kwargs.update(usecols=usecols)
+    parser = all_parsers
+
+    if expected is None:
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(StringIO(data), **kwargs)
+    else:
+        result = parser.read_csv(StringIO(data), **kwargs)
+        tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [["A", "C"], [0, 2]])
+def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols):
+    data = "a,b,c,d\n1,2,3,4\n5,6,7,8"
+    names = ["A", "B", "C", "D"]
+    parser = all_parsers
+
+    result = parser.read_csv(StringIO(data), header=0, names=names, usecols=usecols)
+    expected = DataFrame({"A": [1, 5], "C": [3, 7]})
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py
new file mode 100644
index 0000000000000..c6b700c0adfff
--- /dev/null
+++ b/pandas/tests/io/parser/usecols/test_parse_dates.py
@@ -0,0 +1,149 @@
+"""
+Tests the usecols functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import pytest
+
+from pandas._libs.tslib import Timestamp
+
+from pandas import DataFrame, Index
+import pandas._testing as tm
+
+_msg_validate_usecols_arg = (
+    "'usecols' must either be list-like "
+    "of all strings, all unicode, all "
+    "integers or a callable."
+)
+_msg_validate_usecols_names = (
+    "Usecols do not match columns, columns expected but not found: {0}"
+)
+
+
+@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
+def test_usecols_with_parse_dates(all_parsers, usecols):
+    # see gh-9755
+    data = """a,b,c,d,e
+0,1,20140101,0900,4
+0,1,20140102,1000,4"""
+    parser = all_parsers
+    parse_dates = [[1, 2]]
+
+    cols = {
+        "a": [0, 0],
+        "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
+    }
+    expected = DataFrame(cols, columns=["c_d", "a"])
+    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_parse_dates2(all_parsers):
+    # see gh-13604
+    parser = all_parsers
+    data = """2008-02-07 09:40,1032.43
+2008-02-07 09:50,1042.54
+2008-02-07 10:00,1051.65"""
+
+    names = ["date", "values"]
+    usecols = names[:]
+    parse_dates = [0]
+
+    index = Index(
+        [
+            Timestamp("2008-02-07 09:40"),
+            Timestamp("2008-02-07 09:50"),
+            Timestamp("2008-02-07 10:00"),
+        ],
+        name="date",
+    )
+    cols = {"values": [1032.43, 1042.54, 1051.65]}
+    expected = DataFrame(cols, index=index)
+
+    result = parser.read_csv(
+        StringIO(data),
+        parse_dates=parse_dates,
+        index_col=0,
+        usecols=usecols,
+        header=None,
+        names=names,
+    )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_parse_dates3(all_parsers):
+    # see gh-14792
+    parser = all_parsers
+    data = """a,b,c,d,e,f,g,h,i,j
+2016/09/21,1,1,2,3,4,5,6,7,8"""
+
+    usecols = list("abcdefghij")
+    parse_dates = [0]
+
+    cols = {
+        "a": Timestamp("2016-09-21"),
+        "b": [1],
+        "c": [1],
+        "d": [2],
+        "e": [3],
+        "f": [4],
+        "g": [5],
+        "h": [6],
+        "i": [7],
+        "j": [8],
+    }
+    expected = DataFrame(cols, columns=usecols)
+
+    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_parse_dates4(all_parsers):
+    data = "a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8"
+    usecols = list("abcdefghij")
+    parse_dates = [[0, 1]]
+    parser = all_parsers
+
+    cols = {
+        "a_b": "2016/09/21 1",
+        "c": [1],
+        "d": [2],
+        "e": [3],
+        "f": [4],
+        "g": [5],
+        "h": [6],
+        "i": [7],
+        "j": [8],
+    }
+    expected = DataFrame(cols, columns=["a_b"] + list("cdefghij"))
+
+    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]])
+@pytest.mark.parametrize(
+    "names",
+    [
+        list("abcde"),  # Names span all columns in original data.
+        list("acd"),  # Names span only the selected columns.
+    ],
+)
+def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names):
+    # see gh-9755
+    s = """0,1,20140101,0900,4
+0,1,20140102,1000,4"""
+    parse_dates = [[1, 2]]
+    parser = all_parsers
+
+    cols = {
+        "a": [0, 0],
+        "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
+    }
+    expected = DataFrame(cols, columns=["c_d", "a"])
+
+    result = parser.read_csv(
+        StringIO(s), names=names, parse_dates=parse_dates, usecols=usecols
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/usecols/test_strings.py b/pandas/tests/io/parser/usecols/test_strings.py
new file mode 100644
index 0000000000000..8cecf1fc981ee
--- /dev/null
+++ b/pandas/tests/io/parser/usecols/test_strings.py
@@ -0,0 +1,97 @@
+"""
+Tests the usecols functionality during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import pytest
+
+from pandas import DataFrame
+import pandas._testing as tm
+
+_msg_validate_usecols_arg = (
+    "'usecols' must either be list-like "
+    "of all strings, all unicode, all "
+    "integers or a callable."
+)
+_msg_validate_usecols_names = (
+    "Usecols do not match columns, columns expected but not found: {0}"
+)
+
+
+def test_usecols_with_unicode_strings(all_parsers):
+    # see gh-13219
+    data = """AAA,BBB,CCC,DDD
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    exp_data = {
+        "AAA": {
+            0: 0.056674972999999997,
+            1: 2.6132309819999997,
+            2: 3.5689350380000002,
+        },
+        "BBB": {0: 8, 1: 2, 2: 7},
+    }
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), usecols=["AAA", "BBB"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_usecols_with_single_byte_unicode_strings(all_parsers):
+    # see gh-13219
+    data = """A,B,C,D
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    exp_data = {
+        "A": {
+            0: 0.056674972999999997,
+            1: 2.6132309819999997,
+            2: 3.5689350380000002,
+        },
+        "B": {0: 8, 1: 2, 2: 7},
+    }
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), usecols=["A", "B"])
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("usecols", [["AAA", b"BBB"], [b"AAA", "BBB"]])
+def test_usecols_with_mixed_encoding_strings(all_parsers, usecols):
+    data = """AAA,BBB,CCC,DDD
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
+        parser.read_csv(StringIO(data), usecols=usecols)
+
+
+@pytest.mark.parametrize("usecols", [["あああ", "いい"], ["あああ", "いい"]])
+def test_usecols_with_multi_byte_characters(all_parsers, usecols):
+    data = """あああ,いい,ううう,ええええ
+0.056674973,8,True,a
+2.613230982,2,False,b
+3.568935038,7,False,a"""
+    parser = all_parsers
+
+    exp_data = {
+        "あああ": {
+            0: 0.056674972999999997,
+            1: 2.6132309819999997,
+            2: 3.5689350380000002,
+        },
+        "いい": {0: 8, 1: 2, 2: 7},
+    }
+    expected = DataFrame(exp_data)
+
+    result = parser.read_csv(StringIO(data), usecols=usecols)
+    tm.assert_frame_equal(result, expected)

From a88314d3d867506ee8fc943df166ae5416950d2a Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <andrew.r.wieteska@gmail.com>
Date: Thu, 31 Dec 2020 14:18:49 -0500
Subject: [PATCH 5/6] dedeuplicate base filenames

---
 .../io/parser/dtypes/test_dtypes_basic.py     | 167 ++++++++++++++++++
 .../{test_basic.py => test_usecols_basic.py}  |   0
 2 files changed, 167 insertions(+)
 create mode 100644 pandas/tests/io/parser/dtypes/test_dtypes_basic.py
 rename pandas/tests/io/parser/usecols/{test_basic.py => test_usecols_basic.py} (100%)

diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
new file mode 100644
index 0000000000000..e416d8dcdd905
--- /dev/null
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -0,0 +1,167 @@
+"""
+Tests dtype specification during parsing
+for all of the parsers defined in parsers.py
+"""
+from io import StringIO
+
+import numpy as np
+import pytest
+
+from pandas.errors import ParserWarning
+
+import pandas as pd
+from pandas import DataFrame
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize("dtype", [str, object])
+@pytest.mark.parametrize("check_orig", [True, False])
+def test_dtype_all_columns(all_parsers, dtype, check_orig):
+    # see gh-3795, gh-6607
+    parser = all_parsers
+
+    df = DataFrame(
+        np.random.rand(5, 2).round(4),
+        columns=list("AB"),
+        index=["1A", "1B", "1C", "1D", "1E"],
+    )
+
+    with tm.ensure_clean("__passing_str_as_dtype__.csv") as path:
+        df.to_csv(path)
+
+        result = parser.read_csv(path, dtype=dtype, index_col=0)
+
+        if check_orig:
+            expected = df.copy()
+            result = result.astype(float)
+        else:
+            expected = df.astype(str)
+
+        tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_per_column(all_parsers):
+    parser = all_parsers
+    data = """\
+one,two
+1,2.5
+2,3.5
+3,4.5
+4,5.5"""
+    expected = DataFrame(
+        [[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"]
+    )
+    expected["one"] = expected["one"].astype(np.float64)
+    expected["two"] = expected["two"].astype(object)
+
+    result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str})
+    tm.assert_frame_equal(result, expected)
+
+
+def test_invalid_dtype_per_column(all_parsers):
+    parser = all_parsers
+    data = """\
+one,two
+1,2.5
+2,3.5
+3,4.5
+4,5.5"""
+
+    with pytest.raises(TypeError, match="data type [\"']foo[\"'] not understood"):
+        parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"})
+
+
+def test_raise_on_passed_int_dtype_with_nas(all_parsers):
+    # see gh-2631
+    parser = all_parsers
+    data = """YEAR, DOY, a
+2001,106380451,10
+2001,,11
+2001,106380451,67"""
+
+    msg = (
+        "Integer column has NA values"
+        if parser.engine == "c"
+        else "Unable to convert column DOY"
+    )
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True)
+
+
+def test_dtype_with_converters(all_parsers):
+    parser = all_parsers
+    data = """a,b
+1.1,2.2
+1.2,2.3"""
+
+    # Dtype spec ignored if converted specified.
+    with tm.assert_produces_warning(ParserWarning):
+        result = parser.read_csv(
+            StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)}
+        )
+    expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]})
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "dtype", list(np.typecodes["AllInteger"] + np.typecodes["Float"])
+)
+def test_numeric_dtype(all_parsers, dtype):
+    data = "0\n1"
+    parser = all_parsers
+    expected = DataFrame([0, 1], dtype=dtype)
+
+    result = parser.read_csv(StringIO(data), header=None, dtype=dtype)
+    tm.assert_frame_equal(expected, result)
+
+
+def test_boolean_dtype(all_parsers):
+    parser = all_parsers
+    data = "\n".join(
+        [
+            "a",
+            "True",
+            "TRUE",
+            "true",
+            "1",
+            "1.0",
+            "False",
+            "FALSE",
+            "false",
+            "0",
+            "0.0",
+            "NaN",
+            "nan",
+            "NA",
+            "null",
+            "NULL",
+        ]
+    )
+
+    result = parser.read_csv(StringIO(data), dtype="boolean")
+    expected = DataFrame(
+        {
+            "a": pd.array(
+                [
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    False,
+                    False,
+                    False,
+                    False,
+                    False,
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                ],
+                dtype="boolean",
+            )
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/usecols/test_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py
similarity index 100%
rename from pandas/tests/io/parser/usecols/test_basic.py
rename to pandas/tests/io/parser/usecols/test_usecols_basic.py

From bc43d16c10d7eb88e24d7f6b6c5dc6242365f0b4 Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <andrew.r.wieteska@gmail.com>
Date: Thu, 31 Dec 2020 14:55:39 -0500
Subject: [PATCH 6/6] complete file renaming

---
 pandas/tests/io/parser/dtypes/test_basic.py | 167 --------------------
 1 file changed, 167 deletions(-)
 delete mode 100644 pandas/tests/io/parser/dtypes/test_basic.py

diff --git a/pandas/tests/io/parser/dtypes/test_basic.py b/pandas/tests/io/parser/dtypes/test_basic.py
deleted file mode 100644
index e416d8dcdd905..0000000000000
--- a/pandas/tests/io/parser/dtypes/test_basic.py
+++ /dev/null
@@ -1,167 +0,0 @@
-"""
-Tests dtype specification during parsing
-for all of the parsers defined in parsers.py
-"""
-from io import StringIO
-
-import numpy as np
-import pytest
-
-from pandas.errors import ParserWarning
-
-import pandas as pd
-from pandas import DataFrame
-import pandas._testing as tm
-
-
-@pytest.mark.parametrize("dtype", [str, object])
-@pytest.mark.parametrize("check_orig", [True, False])
-def test_dtype_all_columns(all_parsers, dtype, check_orig):
-    # see gh-3795, gh-6607
-    parser = all_parsers
-
-    df = DataFrame(
-        np.random.rand(5, 2).round(4),
-        columns=list("AB"),
-        index=["1A", "1B", "1C", "1D", "1E"],
-    )
-
-    with tm.ensure_clean("__passing_str_as_dtype__.csv") as path:
-        df.to_csv(path)
-
-        result = parser.read_csv(path, dtype=dtype, index_col=0)
-
-        if check_orig:
-            expected = df.copy()
-            result = result.astype(float)
-        else:
-            expected = df.astype(str)
-
-        tm.assert_frame_equal(result, expected)
-
-
-def test_dtype_per_column(all_parsers):
-    parser = all_parsers
-    data = """\
-one,two
-1,2.5
-2,3.5
-3,4.5
-4,5.5"""
-    expected = DataFrame(
-        [[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"]
-    )
-    expected["one"] = expected["one"].astype(np.float64)
-    expected["two"] = expected["two"].astype(object)
-
-    result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str})
-    tm.assert_frame_equal(result, expected)
-
-
-def test_invalid_dtype_per_column(all_parsers):
-    parser = all_parsers
-    data = """\
-one,two
-1,2.5
-2,3.5
-3,4.5
-4,5.5"""
-
-    with pytest.raises(TypeError, match="data type [\"']foo[\"'] not understood"):
-        parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"})
-
-
-def test_raise_on_passed_int_dtype_with_nas(all_parsers):
-    # see gh-2631
-    parser = all_parsers
-    data = """YEAR, DOY, a
-2001,106380451,10
-2001,,11
-2001,106380451,67"""
-
-    msg = (
-        "Integer column has NA values"
-        if parser.engine == "c"
-        else "Unable to convert column DOY"
-    )
-    with pytest.raises(ValueError, match=msg):
-        parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True)
-
-
-def test_dtype_with_converters(all_parsers):
-    parser = all_parsers
-    data = """a,b
-1.1,2.2
-1.2,2.3"""
-
-    # Dtype spec ignored if converted specified.
-    with tm.assert_produces_warning(ParserWarning):
-        result = parser.read_csv(
-            StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)}
-        )
-    expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]})
-    tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize(
-    "dtype", list(np.typecodes["AllInteger"] + np.typecodes["Float"])
-)
-def test_numeric_dtype(all_parsers, dtype):
-    data = "0\n1"
-    parser = all_parsers
-    expected = DataFrame([0, 1], dtype=dtype)
-
-    result = parser.read_csv(StringIO(data), header=None, dtype=dtype)
-    tm.assert_frame_equal(expected, result)
-
-
-def test_boolean_dtype(all_parsers):
-    parser = all_parsers
-    data = "\n".join(
-        [
-            "a",
-            "True",
-            "TRUE",
-            "true",
-            "1",
-            "1.0",
-            "False",
-            "FALSE",
-            "false",
-            "0",
-            "0.0",
-            "NaN",
-            "nan",
-            "NA",
-            "null",
-            "NULL",
-        ]
-    )
-
-    result = parser.read_csv(StringIO(data), dtype="boolean")
-    expected = DataFrame(
-        {
-            "a": pd.array(
-                [
-                    True,
-                    True,
-                    True,
-                    True,
-                    True,
-                    False,
-                    False,
-                    False,
-                    False,
-                    False,
-                    None,
-                    None,
-                    None,
-                    None,
-                    None,
-                ],
-                dtype="boolean",
-            )
-        }
-    )
-
-    tm.assert_frame_equal(result, expected)