From 99b002e8148c6a453e83143bca8f152aaa77a793 Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Wed, 22 Sep 2021 17:40:43 -0700
Subject: [PATCH 1/3] BUG: Fix some index_col tests for pyarrow read_csv

---
 pandas/io/parsers/arrow_parser_wrapper.py   | 13 +++++++++++++
 pandas/tests/io/parser/common/test_index.py |  1 -
 pandas/tests/io/parser/test_index_col.py    |  2 --
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 033cd88da9687..4a50f99d1ca11 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -91,12 +91,18 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
             The processed DataFrame.
         """
         num_cols = len(frame.columns)
+        multi_index_named = True
         if self.header is None:
             if self.names is None:
                 if self.prefix is not None:
                     self.names = [f"{self.prefix}{i}" for i in range(num_cols)]
                 elif self.header is None:
                     self.names = range(num_cols)
+            if len(self.names) != num_cols:
+                # usecols is passed through to pyarrow, we only handle index col here
+                # pretty much we just pad names to the expected length
+                self.names = range(num_cols - len(self.names)) + self.names
+                multi_index_named = False
             frame.columns = self.names
         # we only need the frame not the names
         frame.columns, frame = self._do_date_conversions(frame.columns, frame)
@@ -104,7 +110,14 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
             for i, item in enumerate(self.index_col):
                 if is_integer(item):
                     self.index_col[i] = frame.columns[item]
+                else:
+                    # String case
+                    if item not in frame.columns:
+                        raise ValueError(f"Index {item} invalid")
             frame.set_index(self.index_col, drop=True, inplace=True)
+            # Clear names if headerless and no name given
+            if self.header is None and not multi_index_named:
+                frame.index.names = [None] * len(frame.index.names)
 
         if self.kwds.get("dtype") is not None:
             frame = frame.astype(self.kwds.get("dtype"))
diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py
index a37bd010d0e1b..082bb466d5dd2 100644
--- a/pandas/tests/io/parser/common/test_index.py
+++ b/pandas/tests/io/parser/common/test_index.py
@@ -80,7 +80,6 @@ def test_pass_names_with_index(all_parsers, data, kwargs, expected):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 @pytest.mark.parametrize("index_col", [[0, 1], [1, 0]])
 def test_multi_index_no_level_names(all_parsers, index_col):
     data = """index1,index2,A,B,C,D
diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py
index 6be82af5349ed..646cb2029919d 100644
--- a/pandas/tests/io/parser/test_index_col.py
+++ b/pandas/tests/io/parser/test_index_col.py
@@ -19,7 +19,6 @@
 skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
-@skip_pyarrow
 @pytest.mark.parametrize("with_header", [True, False])
 def test_index_col_named(all_parsers, with_header):
     parser = all_parsers
@@ -228,7 +227,6 @@ def test_header_with_index_col(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@skip_pyarrow
 @pytest.mark.slow
 def test_index_col_large_csv(all_parsers):
     # https://github.com/pandas-dev/pandas/issues/37094

From 47d5b3367307a9c3eaf5a6acba2ef828111aa0cb Mon Sep 17 00:00:00 2001
From: Thomas Li <thomasli1234567890@gmail.com>
Date: Wed, 22 Sep 2021 20:04:17 -0700
Subject: [PATCH 2/3] fixes

---
 pandas/io/parsers/arrow_parser_wrapper.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 4a50f99d1ca11..5b1b178c4f610 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -100,8 +100,10 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
                     self.names = range(num_cols)
             if len(self.names) != num_cols:
                 # usecols is passed through to pyarrow, we only handle index col here
-                # pretty much we just pad names to the expected length
-                self.names = range(num_cols - len(self.names)) + self.names
+                # The only way self.names is not the same length as number of cols is
+                # if we have int index_col. We should just pad the names(they will get
+                # removed anyways) to expected length then.
+                self.names = list(range(num_cols - len(self.names))) + self.names
                 multi_index_named = False
             frame.columns = self.names
         # we only need the frame not the names

From bc500ed2b91a74859bdc077af9f7ae1e890caf35 Mon Sep 17 00:00:00 2001
From: Thomas Li <47963215+lithomas1@users.noreply.github.com>
Date: Mon, 27 Sep 2021 13:46:46 -0700
Subject: [PATCH 3/3] Update v1.4.0.rst

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index f18b3b75ca3d2..ff92f55c67b12 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -92,7 +92,7 @@ Multithreaded CSV reading with a new CSV Engine based on pyarrow
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 :func:`pandas.read_csv` now accepts ``engine="pyarrow"`` (requires at least ``pyarrow`` 0.17.0) as an argument, allowing for faster csv parsing on multicore machines
-with pyarrow installed. See the :doc:`I/O docs </user_guide/io>` for more info. (:issue:`23697`)
+with pyarrow installed. See the :doc:`I/O docs </user_guide/io>` for more info. (:issue:`23697`, :issue:`43706`)
 
 .. _whatsnew_140.enhancements.window_rank: