From 9a82d19d8f945d6e5e4e8b8745f91273be335678 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Fri, 23 Apr 2021 23:40:10 +0200
Subject: [PATCH 1/7] Bug in read_csv raising list index out of range instead
 of ParserError

---
 doc/source/whatsnew/v1.3.0.rst                    |  1 +
 pandas/io/parsers/python_parser.py                |  7 +++++++
 pandas/tests/io/parser/test_python_parser_only.py | 12 ++++++++++++
 3 files changed, 20 insertions(+)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 842b50ce53b21..2f50db55b4d96 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -797,6 +797,7 @@ I/O
 - Bug in :func:`read_excel` raising ``AttributeError`` with ``MultiIndex`` header followed by two empty rows and no index, and bug affecting :func:`read_excel`, :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_clipboard` where one blank row after a ``MultiIndex`` header with no index would be dropped (:issue:`40442`)
 - Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40907`)
 - Bug in :func:`read_orc` always raising ``AttributeError`` (:issue:`40918`)
+- Bug in :func:`read_csv` raising uncontrolled ``ValueError`` when ``usecols`` index is ouf of bounds, now raising ``ParserError`` (:issue:`25623`)
 
 Period
 ^^^^^^
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index a6d38eab99977..89722e1c76503 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -537,6 +537,13 @@ def _handle_usecols(self, columns, usecols_key):
                     else:
                         col_indices.append(col)
             else:
+                missing_usecols = [
+                    col for col in self.usecols if col > len(usecols_key)
+                ]
+                if missing_usecols:
+                    raise ParserError(
+                        f"Usecols indices {missing_usecols} are out of bounds!"
+                    )
                 col_indices = self.usecols
 
             columns = [
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index cf6866946ab76..76d1bd470532e 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -312,3 +312,15 @@ def test_malformed_skipfooter(python_parser_only):
     msg = "Expected 3 fields in line 4, saw 5"
     with pytest.raises(ParserError, match=msg):
         parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1)
+
+
+def test_usecols_indices_out_of_bounds(python_parser_only):
+    # GH#25623
+    parser = python_parser_only
+    data = """
+    a,b
+    1,2
+    """
+    msg = r"Usecols indices \[10\] are out of bounds!"
+    with pytest.raises(ParserError, match=msg):
+        parser.read_csv(StringIO(data), usecols=[0, 10])

From 0287dd98a20a18275b9d89ccddfc2375b1b59a19 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 24 Apr 2021 00:39:43 +0200
Subject: [PATCH 2/7] Deprecated usecols with out of bounds indices in read_csv
 with c engine

---
 doc/source/whatsnew/v1.3.0.rst               |  1 +
 pandas/_libs/parsers.pyx                     | 11 +++++++++++
 pandas/tests/io/parser/test_c_parser_only.py | 13 +++++++++++++
 3 files changed, 25 insertions(+)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 842b50ce53b21..bc301fad730df 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -611,6 +611,7 @@ Deprecations
 - Deprecated the ``level`` keyword for :class:`DataFrame` and :class:`Series` aggregations; use groupby instead (:issue:`39983`)
 - The ``inplace`` parameter of :meth:`Categorical.remove_categories`, :meth:`Categorical.add_categories` is deprecated and will be removed in a future version (:issue:`37643`)
 - Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword  and already existing columns (:issue:`22818`)
+- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 1a5ac31cc821b..5aa9f1db59e37 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -941,6 +941,17 @@ cdef class TextReader:
                               f"{self.table_width - self.leading_cols} "
                               f"and found {num_cols}")
 
+        if (self.usecols is not None and not callable(self.usecols) and
+                all(isinstance(u, int) for u in self.usecols)):
+            missing_usecols = [col for col in self.usecols if col >= num_cols]
+            if missing_usecols:
+                warnings.warn(
+                    "Defining usecols with out of bounds indices is deprecated "
+                    "and will raise a ParserError in a future version.",
+                    FutureWarning,
+                    stacklevel=6,
+                )
+
         results = {}
         nused = 0
         for i in range(self.table_width):
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index f8aff3ad3696a..9060ca65b48c4 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -680,3 +680,16 @@ def test_float_precision_options(c_parser_only):
 
     with pytest.raises(ValueError, match=msg):
         parser.read_csv(StringIO(s), float_precision="junk")
+
+
+def test_usecols_indices_out_of_bounds(c_parser_only):
+    # GH#25623
+    parser = c_parser_only
+    data = """
+a,b
+1,2
+    """
+    with tm.assert_produces_warning(FutureWarning):
+        result = parser.read_csv(StringIO(data), usecols=[0, 2])
+    expected = DataFrame({"a": [1], "b": None})
+    tm.assert_frame_equal(result, expected)

From 97158edc69233c2d534a8fd005ef3c5c91e2a0fe Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 24 Apr 2021 00:41:41 +0200
Subject: [PATCH 3/7] Adjust test

---
 pandas/io/parsers/python_parser.py                | 2 +-
 pandas/tests/io/parser/test_python_parser_only.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index 89722e1c76503..205800d5ceaaa 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -538,7 +538,7 @@ def _handle_usecols(self, columns, usecols_key):
                         col_indices.append(col)
             else:
                 missing_usecols = [
-                    col for col in self.usecols if col > len(usecols_key)
+                    col for col in self.usecols if col >= len(usecols_key)
                 ]
                 if missing_usecols:
                     raise ParserError(
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index 76d1bd470532e..231780224ee12 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -321,6 +321,6 @@ def test_usecols_indices_out_of_bounds(python_parser_only):
     a,b
     1,2
     """
-    msg = r"Usecols indices \[10\] are out of bounds!"
+    msg = r"Usecols indices \[2\] are out of bounds!"
     with pytest.raises(ParserError, match=msg):
-        parser.read_csv(StringIO(data), usecols=[0, 10])
+        parser.read_csv(StringIO(data), usecols=[0, 2])

From f446e4f113849cdad0a28f6ffeb8a722e4bdd03b Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Sat, 24 Apr 2021 17:19:18 +0200
Subject: [PATCH 4/7] Fix bug for names and add tests

---
 pandas/io/parsers/python_parser.py             | 18 +++++++++++-------
 .../tests/io/parser/test_python_parser_only.py |  8 ++++++--
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index 205800d5ceaaa..21bfc9ee88efe 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -470,12 +470,14 @@ def _infer_columns(self):
                 if self.usecols is not None:
                     # Set _use_cols. We don't store columns because they are
                     # overwritten.
-                    self._handle_usecols(columns, names)
+                    self._handle_usecols(columns, names, num_original_columns)
                 else:
                     num_original_columns = len(names)
                 columns = [names]
             else:
-                columns = self._handle_usecols(columns, columns[0])
+                columns = self._handle_usecols(
+                    columns, columns[0], num_original_columns
+                )
         else:
             try:
                 line = self._buffered_line()
@@ -494,10 +496,12 @@ def _infer_columns(self):
                     columns = [[f"{self.prefix}{i}" for i in range(ncols)]]
                 else:
                     columns = [list(range(ncols))]
-                columns = self._handle_usecols(columns, columns[0])
+                columns = self._handle_usecols(
+                    columns, columns[0], num_original_columns
+                )
             else:
                 if self.usecols is None or len(names) >= num_original_columns:
-                    columns = self._handle_usecols([names], names)
+                    columns = self._handle_usecols([names], names, num_original_columns)
                     num_original_columns = len(names)
                 else:
                     if not callable(self.usecols) and len(names) != len(self.usecols):
@@ -506,13 +510,13 @@ def _infer_columns(self):
                             "header fields in the file"
                         )
                     # Ignore output but set used columns.
-                    self._handle_usecols([names], names)
+                    self._handle_usecols([names], names, ncols)
                     columns = [names]
                     num_original_columns = ncols
 
         return columns, num_original_columns, unnamed_cols
 
-    def _handle_usecols(self, columns, usecols_key):
+    def _handle_usecols(self, columns, usecols_key, num_original_columns):
         """
         Sets self._col_indices
 
@@ -538,7 +542,7 @@ def _handle_usecols(self, columns, usecols_key):
                         col_indices.append(col)
             else:
                 missing_usecols = [
-                    col for col in self.usecols if col >= len(usecols_key)
+                    col for col in self.usecols if col >= num_original_columns
                 ]
                 if missing_usecols:
                     raise ParserError(
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index 231780224ee12..49303658a63f9 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -314,8 +314,12 @@ def test_malformed_skipfooter(python_parser_only):
         parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1)
 
 
-def test_usecols_indices_out_of_bounds(python_parser_only):
+@pytest.mark.parametrize("header", [0, None])
+@pytest.mark.parametrize("names", [None, ["a", "b"], ["a", "b", "c"]])
+def test_usecols_indices_out_of_bounds(python_parser_only, names, header):
     # GH#25623
+    if header == 0 and names == ["a", "b", "c"]:
+        pytest.skip("This case is not valid")
     parser = python_parser_only
     data = """
     a,b
@@ -323,4 +327,4 @@ def test_usecols_indices_out_of_bounds(python_parser_only):
     """
     msg = r"Usecols indices \[2\] are out of bounds!"
     with pytest.raises(ParserError, match=msg):
-        parser.read_csv(StringIO(data), usecols=[0, 2])
+        parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=header)

From 21b496bf662bc80d5258b40ad8d0f143b93b0323 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Wed, 12 May 2021 19:54:32 +0200
Subject: [PATCH 5/7] Rebase after fixing bug on master

---
 pandas/io/parsers/python_parser.py                   | 8 ++++++--
 pandas/tests/io/parser/test_python_parser_only.py    | 3 +--
 pandas/tests/io/parser/usecols/test_usecols_basic.py | 3 ++-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index da876eb71a0fc..c5784b8a0b01c 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -15,6 +15,7 @@
     Tuple,
     cast,
 )
+import warnings
 
 import numpy as np
 
@@ -557,8 +558,11 @@ def _handle_usecols(self, columns, usecols_key, num_original_columns):
                     col for col in self.usecols if col >= num_original_columns
                 ]
                 if missing_usecols:
-                    raise ParserError(
-                        f"Usecols indices {missing_usecols} are out of bounds!"
+                    warnings.warn(
+                        "Defining usecols with out of bounds indices is deprecated "
+                        "and will raise a ParserError in a future version.",
+                        FutureWarning,
+                        stacklevel=8,
                     )
                 col_indices = self.usecols
 
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index 49303658a63f9..a163490637343 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -325,6 +325,5 @@ def test_usecols_indices_out_of_bounds(python_parser_only, names, header):
     a,b
     1,2
     """
-    msg = r"Usecols indices \[2\] are out of bounds!"
-    with pytest.raises(ParserError, match=msg):
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=header)
diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py
index 371b8bea7def2..b86dc5ef85fc6 100644
--- a/pandas/tests/io/parser/usecols/test_usecols_basic.py
+++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py
@@ -383,7 +383,8 @@ def test_usecols_indices_out_of_bounds(all_parsers, names):
 a,b
 1,2
     """
-    result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0)
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+        result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0)
     expected = DataFrame({"a": [1], "b": [None]})
     if names is None and parser.engine == "python":
         expected = DataFrame({"a": [1]})

From 41e3310d40b06ba3719188a886cde34d5bb43a48 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Wed, 12 May 2021 20:02:58 +0200
Subject: [PATCH 6/7] Remove no longer necessary test

---
 pandas/tests/io/parser/test_c_parser_only.py      | 13 -------------
 pandas/tests/io/parser/test_python_parser_only.py | 15 ---------------
 2 files changed, 28 deletions(-)

diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index d5f21f2a2636b..044af57f49240 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -682,16 +682,3 @@ def test_float_precision_options(c_parser_only):
 
     with pytest.raises(ValueError, match=msg):
         parser.read_csv(StringIO(s), float_precision="junk")
-
-
-def test_usecols_indices_out_of_bounds(c_parser_only):
-    # GH#25623
-    parser = c_parser_only
-    data = """
-a,b
-1,2
-    """
-    with tm.assert_produces_warning(FutureWarning):
-        result = parser.read_csv(StringIO(data), usecols=[0, 2])
-    expected = DataFrame({"a": [1], "b": None})
-    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
index a163490637343..cf6866946ab76 100644
--- a/pandas/tests/io/parser/test_python_parser_only.py
+++ b/pandas/tests/io/parser/test_python_parser_only.py
@@ -312,18 +312,3 @@ def test_malformed_skipfooter(python_parser_only):
     msg = "Expected 3 fields in line 4, saw 5"
     with pytest.raises(ParserError, match=msg):
         parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1)
-
-
-@pytest.mark.parametrize("header", [0, None])
-@pytest.mark.parametrize("names", [None, ["a", "b"], ["a", "b", "c"]])
-def test_usecols_indices_out_of_bounds(python_parser_only, names, header):
-    # GH#25623
-    if header == 0 and names == ["a", "b", "c"]:
-        pytest.skip("This case is not valid")
-    parser = python_parser_only
-    data = """
-    a,b
-    1,2
-    """
-    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-        parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=header)

From 5bef6766ce02e397c23a2f6792151be5ec16c524 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Thu, 13 May 2021 19:59:29 +0200
Subject: [PATCH 7/7] Type function

---
 pandas/io/parsers/python_parser.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index c5784b8a0b01c..9082e41698913 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -529,7 +529,12 @@ def _infer_columns(self):
 
         return columns, num_original_columns, unnamed_cols
 
-    def _handle_usecols(self, columns, usecols_key, num_original_columns):
+    def _handle_usecols(
+        self,
+        columns: List[List[Union[Optional[str], Optional[int]]]],
+        usecols_key: List[Union[Optional[str], Optional[int]]],
+        num_original_columns: int,
+    ):
         """
         Sets self._col_indices