diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 842b50ce53b21..bc301fad730df 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -611,6 +611,7 @@ Deprecations - Deprecated the ``level`` keyword for :class:`DataFrame` and :class:`Series` aggregations; use groupby instead (:issue:`39983`) - The ``inplace`` parameter of :meth:`Categorical.remove_categories`, :meth:`Categorical.add_categories` is deprecated and will be removed in a future version (:issue:`37643`) - Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`) +- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 1a5ac31cc821b..5aa9f1db59e37 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -941,6 +941,17 @@ cdef class TextReader: f"{self.table_width - self.leading_cols} " f"and found {num_cols}") + if (self.usecols is not None and not callable(self.usecols) and + all(isinstance(u, int) for u in self.usecols)): + missing_usecols = [col for col in self.usecols if col >= num_cols] + if missing_usecols: + warnings.warn( + "Defining usecols with out of bounds indices is deprecated " + "and will raise a ParserError in a future version.", + FutureWarning, + stacklevel=6, + ) + results = {} nused = 0 for i in range(self.table_width): diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index f8aff3ad3696a..9060ca65b48c4 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -680,3 +680,16 @@ def test_float_precision_options(c_parser_only): with pytest.raises(ValueError, match=msg): parser.read_csv(StringIO(s), float_precision="junk") + + +def test_usecols_indices_out_of_bounds(c_parser_only): + # GH#25623 + parser = c_parser_only + data = """ +a,b +1,2 + """ + with tm.assert_produces_warning(FutureWarning): + result = parser.read_csv(StringIO(data), usecols=[0, 2]) + expected = DataFrame({"a": [1], "b": None}) + tm.assert_frame_equal(result, expected)